From 65af6ee35e7800f380293d8339e2cd5e3ac33394 Mon Sep 17 00:00:00 2001
From: Martin Bubel <martin.bubel@itwm.fraunhofer.de>
Date: Mon, 16 Oct 2023 21:20:17 +0200
Subject: [PATCH] replace np.int by int

---
 GPy/kern/src/coregionalize.py                 |   97 +-
 GPy/kern/src/eq_ode1.py                       |  726 ++---
 GPy/kern/src/eq_ode2.py                       | 1756 ++++++-----
 GPy/kern/src/todo/eq_ode1.py                  |    6 +-
 .../sparse_gp_coregionalized_regression.py    |   66 +-
 GPy/models/ss_mrd.py                          |  406 ++-
 GPy/models/state_space_main.py                | 2569 +++++++++++------
 GPy/plotting/matplot_dep/base_plots.py        |  139 +-
 GPy/plotting/matplot_dep/plot_definitions.py  |  375 ++-
 GPy/testing/test_ep_likelihood.py             |    2 +-
 GPy/testing/test_likelihood.py                |    2 +-
 GPy/testing/test_model.py                     |    4 +-
 GPy/testing/test_pickle.py                    |    8 +-
 GPy/util/classification.py                    |   23 +-
 GPy/util/multioutput.py                       |   85 +-
 15 files changed, 3889 insertions(+), 2375 deletions(-)

diff --git a/GPy/kern/src/coregionalize.py b/GPy/kern/src/coregionalize.py
index d05f5c6a..7f92d4f7 100644
--- a/GPy/kern/src/coregionalize.py
+++ b/GPy/kern/src/coregionalize.py
@@ -5,13 +5,16 @@ from .kern import Kern
 import numpy as np
 from ...core.parameterization import Param
 from paramz.transformations import Logexp
-from ...util.config import config # for assesing whether to use cython
+from ...util.config import config  # for assesing whether to use cython
 
 try:
     from . import coregionalize_cython
-    use_coregionalize_cython = config.getboolean('cython', 'working')
+
+    use_coregionalize_cython = config.getboolean("cython", "working")
 except ImportError:
-    print('warning in coregionalize: failed to import cython module: falling back to numpy')
+    print(
+        "warning in coregionalize: failed to import cython module: falling back to numpy"
+    )
     use_coregionalize_cython = False
 
 
@@ -43,22 +46,34 @@ class Coregionalize(Kern):
 
     .. note: see coregionalization examples in GPy.examples.regression for some usage.
     """
-    def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, active_dims=None, name='coregion'):
+
+    def __init__(
+        self,
+        input_dim,
+        output_dim,
+        rank=1,
+        W=None,
+        kappa=None,
+        active_dims=None,
+        name="coregion",
+    ):
         super(Coregionalize, self).__init__(input_dim, active_dims, name=name)
         self.output_dim = output_dim
         self.rank = rank
-        if self.rank>output_dim:
-            print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
+        if self.rank > output_dim:
+            print(
+                "Warning: Unusual choice of rank, it should normally be less than the output_dim."
+            )
         if W is None:
-            W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
+            W = 0.5 * np.random.randn(self.output_dim, self.rank) / np.sqrt(self.rank)
         else:
-            assert W.shape==(self.output_dim, self.rank)
-        self.W = Param('W', W)
+            assert W.shape == (self.output_dim, self.rank)
+        self.W = Param("W", W)
         if kappa is None:
-            kappa = 0.5*np.ones(self.output_dim)
+            kappa = 0.5 * np.ones(self.output_dim)
         else:
-            assert kappa.shape==(self.output_dim, )
-        self.kappa = Param('kappa', kappa, Logexp())
+            assert kappa.shape == (self.output_dim,)
+        self.kappa = Param("kappa", kappa, Logexp())
         self.link_parameters(self.W, self.kappa)
 
     def parameters_changed(self):
@@ -70,63 +85,69 @@ class Coregionalize(Kern):
         else:
             return self._K_numpy(X, X2)
 
-
     def _K_numpy(self, X, X2=None):
-        index = np.asarray(X, dtype=np.int)
+        index = np.asarray(X, dtype=int)
         if X2 is None:
-            return self.B[index,index.T]
+            return self.B[index, index.T]
         else:
-            index2 = np.asarray(X2, dtype=np.int)
-            return self.B[index,index2.T]
+            index2 = np.asarray(X2, dtype=int)
+            return self.B[index, index2.T]
 
     def _K_cython(self, X, X2=None):
         if X2 is None:
-            return coregionalize_cython.K_symmetric(self.B, np.asarray(X, dtype=np.int64)[:,0])
-        return coregionalize_cython.K_asymmetric(self.B, np.asarray(X, dtype=np.int64)[:,0], np.asarray(X2, dtype=np.int64)[:,0])
-
+            return coregionalize_cython.K_symmetric(
+                self.B, np.asarray(X, dtype=np.int64)[:, 0]
+            )
+        return coregionalize_cython.K_asymmetric(
+            self.B,
+            np.asarray(X, dtype=np.int64)[:, 0],
+            np.asarray(X2, dtype=np.int64)[:, 0],
+        )
 
     def Kdiag(self, X):
-        return np.diag(self.B)[np.asarray(X, dtype=np.int).flatten()]
+        return np.diag(self.B)[np.asarray(X, dtype=int).flatten()]
 
     def update_gradients_full(self, dL_dK, X, X2=None):
-        index = np.asarray(X, dtype=np.int)
+        index = np.asarray(X, dtype=int)
         if X2 is None:
             index2 = index
         else:
-            index2 = np.asarray(X2, dtype=np.int)
+            index2 = np.asarray(X2, dtype=int)
 
-        #attempt to use cython for a nasty double indexing loop: fall back to numpy
+        # attempt to use cython for a nasty double indexing loop: fall back to numpy
         if use_coregionalize_cython:
             dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2)
         else:
             dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
 
-
         dkappa = np.diag(dL_dK_small).copy()
         dL_dK_small += dL_dK_small.T
-        dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
+        dW = (self.W[:, None, :] * dL_dK_small[:, :, None]).sum(0)
 
         self.W.gradient = dW
         self.kappa.gradient = dkappa
 
     def _gradient_reduce_numpy(self, dL_dK, index, index2):
-        index, index2 = index[:,0], index2[:,0]
+        index, index2 = index[:, 0], index2[:, 0]
         dL_dK_small = np.zeros_like(self.B)
         for i in range(self.output_dim):
-            tmp1 = dL_dK[index==i]
+            tmp1 = dL_dK[index == i]
             for j in range(self.output_dim):
-                dL_dK_small[j,i] = tmp1[:,index2==j].sum()
+                dL_dK_small[j, i] = tmp1[:, index2 == j].sum()
         return dL_dK_small
 
     def _gradient_reduce_cython(self, dL_dK, index, index2):
-        index, index2 = np.int64(index[:,0]), np.int64(index2[:,0])
-        return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
-
+        index, index2 = np.int64(index[:, 0]), np.int64(index2[:, 0])
+        return coregionalize_cython.gradient_reduce(
+            self.B.shape[0], dL_dK, index, index2
+        )
 
     def update_gradients_diag(self, dL_dKdiag, X):
-        index = np.asarray(X, dtype=np.int).flatten()
-        dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)])
-        self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
+        index = np.asarray(X, dtype=int).flatten()
+        dL_dKdiag_small = np.array(
+            [dL_dKdiag[index == i].sum() for i in range(self.output_dim)]
+        )
+        self.W.gradient = 2.0 * self.W * dL_dKdiag_small[:, None]
         self.kappa.gradient = dL_dKdiag_small
 
     def gradients_X(self, dL_dK, X, X2=None):
@@ -154,8 +175,8 @@ class Coregionalize(Kern):
 
     @staticmethod
     def _build_from_input_dict(kernel_class, input_dict):
-        useGPU = input_dict.pop('useGPU', None)
+        useGPU = input_dict.pop("useGPU", None)
         # W and kappa must be converted back to numpy arrays
-        input_dict['W'] = np.array(input_dict['W'])
-        input_dict['kappa'] = np.array(input_dict['kappa'])
+        input_dict["W"] = np.array(input_dict["W"])
+        input_dict["kappa"] = np.array(input_dict["kappa"])
         return Coregionalize(**input_dict)
diff --git a/GPy/kern/src/eq_ode1.py b/GPy/kern/src/eq_ode1.py
index 9c19bead..4361ec23 100644
--- a/GPy/kern/src/eq_ode1.py
+++ b/GPy/kern/src/eq_ode1.py
@@ -8,6 +8,7 @@ from ...core.parameterization import Param
 from paramz.transformations import Logexp
 from paramz.caching import Cache_this
 
+
 class EQ_ODE1(Kern):
     """
     Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
@@ -17,210 +18,236 @@ class EQ_ODE1(Kern):
        \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t)
 
     where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
-    
+
     :param output_dim: number of outputs driven by latent function.
     :type output_dim: int
-    :param W: sensitivities of each output to the latent driving function. 
+    :param W: sensitivities of each output to the latent driving function.
     :type W: ndarray (output_dim x rank).
     :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
     :type rank: int
-    :param decay: decay rates for the first order system. 
+    :param decay: decay rates for the first order system.
     :type decay: array of length output_dim.
     :param delay: delay between latent force and output response.
     :type delay: array of length output_dim.
     :param kappa: diagonal term that allows each latent output to have an independent component to the response.
     :type kappa: array of length output_dim.
-    
+
     .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
     """
-    def __init__(self, input_dim=2, output_dim=1, rank=1, W = None, lengthscale=None,  decay=None, active_dims=None, name='eq_ode1'):
+
+    def __init__(
+        self,
+        input_dim=2,
+        output_dim=1,
+        rank=1,
+        W=None,
+        lengthscale=None,
+        decay=None,
+        active_dims=None,
+        name="eq_ode1",
+    ):
         assert input_dim == 2, "only defined for 1 input dims"
-        super(EQ_ODE1, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name)
+        super(EQ_ODE1, self).__init__(
+            input_dim=input_dim, active_dims=active_dims, name=name
+        )
 
         self.rank = rank
         self.output_dim = output_dim
 
         if lengthscale is None:
-            lengthscale = .5 + np.random.rand(self.rank)
+            lengthscale = 0.5 + np.random.rand(self.rank)
         else:
             lengthscale = np.asarray(lengthscale)
             assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
             if lengthscale.size != self.rank:
-                lengthscale = np.ones(self.rank)*lengthscale
-            
+                lengthscale = np.ones(self.rank) * lengthscale
+
         if W is None:
-            W = .5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
+            W = 0.5 * np.random.randn(self.output_dim, self.rank) / np.sqrt(self.rank)
         else:
             assert W.shape == (self.output_dim, self.rank)
-        
+
         if decay is None:
             decay = np.ones(self.output_dim)
         else:
             decay = np.asarray(decay)
             assert decay.size in [1, self.output_dim], "Bad number of decay"
             if decay.size != self.output_dim:
-                decay = np.ones(self.output_dim)*decay
+                decay = np.ones(self.output_dim) * decay
 
-#        if kappa is None:
-#            self.kappa = np.ones(self.output_dim)
-#        else:
-#            kappa = np.asarray(kappa)
-#            assert kappa.size in [1, self.output_dim], "Bad number of kappa"
-#            if decay.size != self.output_dim:
-#                decay = np.ones(self.output_dim)*kappa
+        #        if kappa is None:
+        #            self.kappa = np.ones(self.output_dim)
+        #        else:
+        #            kappa = np.asarray(kappa)
+        #            assert kappa.size in [1, self.output_dim], "Bad number of kappa"
+        #            if decay.size != self.output_dim:
+        #                decay = np.ones(self.output_dim)*kappa
 
-        #self.kappa = Param('kappa', kappa, Logexp())
-        #self.delay = Param('delay', delay, Logexp())
-        #self.is_normalized = True
-        #self.is_stationary = False
-        #self.gaussian_initial = False
+        # self.kappa = Param('kappa', kappa, Logexp())
+        # self.delay = Param('delay', delay, Logexp())
+        # self.is_normalized = True
+        # self.is_stationary = False
+        # self.gaussian_initial = False
 
-        self.lengthscale = Param('lengthscale', lengthscale, Logexp())
-        self.decay = Param('decay', decay, Logexp())
-        self.W = Param('W', W)
+        self.lengthscale = Param("lengthscale", lengthscale, Logexp())
+        self.decay = Param("decay", decay, Logexp())
+        self.W = Param("W", W)
         self.link_parameters(self.lengthscale, self.decay, self.W)
 
     @Cache_this(limit=3)
     def K(self, X, X2=None):
-        #This way is not working, indexes are lost after using k._slice_X
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+        # This way is not working, indexes are lost after using k._slice_X
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         index = np.int_(np.round(X[:, 1]))
-        index = index.reshape(index.size,)
+        index = index.reshape(
+            index.size,
+        )
         X_flag = index[0] >= self.output_dim
         if X2 is None:
             if X_flag:
-                #Calculate covariance function for the latent functions
+                # Calculate covariance function for the latent functions
                 index -= self.output_dim
                 return self._Kuu(X, index)
             else:
                 raise NotImplementedError
         else:
-            #This way is not working, indexes are lost after using k._slice_X
-            #index2 = np.asarray(X2, dtype=np.int)
-            #index2 = index2.reshape(index2.size,)
-            if hasattr(X2, 'values'):
+            # This way is not working, indexes are lost after using k._slice_X
+            # index2 = np.asarray(X2, dtype=int)
+            # index2 = index2.reshape(index2.size,)
+            if hasattr(X2, "values"):
                 X2 = X2.values
             index2 = np.int_(np.round(X2[:, 1]))
-            index2 = index2.reshape(index2.size,)
+            index2 = index2.reshape(
+                index2.size,
+            )
             X2_flag = index2[0] >= self.output_dim
-            #Calculate cross-covariance function
+            # Calculate cross-covariance function
             if not X_flag and X2_flag:
                 index2 -= self.output_dim
-                return self._Kfu(X, index, X2, index2) #Kfu
+                return self._Kfu(X, index, X2, index2)  # Kfu
             elif X_flag and not X2_flag:
                 index -= self.output_dim
-                return self._Kfu(X2, index2, X, index).T #Kuf
+                return self._Kfu(X2, index2, X, index).T  # Kuf
             elif X_flag and X2_flag:
                 index -= self.output_dim
                 index2 -= self.output_dim
-                return self._Kusu(X, index, X2, index2) #Ku_s u
+                return self._Kusu(X, index, X2, index2)  # Ku_s u
             else:
-                raise NotImplementedError #Kf_s f
+                raise NotImplementedError  # Kf_s f
 
-    #Calculate the covariance function for diag(Kff(X,X))
+    # Calculate the covariance function for diag(Kff(X,X))
     def Kdiag(self, X):
-        if hasattr(X, 'values'):
+        if hasattr(X, "values"):
             index = np.int_(np.round(X[:, 1].values))
         else:
             index = np.int_(np.round(X[:, 1]))
-        index = index.reshape(index.size,)
+        index = index.reshape(
+            index.size,
+        )
         X_flag = index[0] >= self.output_dim
-        
-        if X_flag: #Kuudiag        
-            return np.ones(X[:,0].shape)
-        else: #Kffdiag
+
+        if X_flag:  # Kuudiag
+            return np.ones(X[:, 0].shape)
+        else:  # Kffdiag
             kdiag = self._Kdiag(X)
             return np.sum(kdiag, axis=1)
-        
+
     def _Kdiag(self, X):
-        #This way is not working, indexes are lost after using k._slice_X
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+        # This way is not working, indexes are lost after using k._slice_X
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         index = np.int_(X[:, 1])
-        index = index.reshape(index.size,)
-        
-        #terms that move along t
+        index = index.reshape(
+            index.size,
+        )
+
+        # terms that move along t
         t = X[:, 0].reshape(X.shape[0], 1)
-        d = np.unique(index) #Output Indexes
+        d = np.unique(index)  # Output Indexes
         B = self.decay.values[d]
         S = self.W.values[d, :]
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        
+
         B = B.reshape(B.size, 1)
-        #Terms that move along q
+        # Terms that move along q
         lq = self.lengthscale.values.reshape(1, self.rank)
-        S2 = S*S
-        kdiag = np.empty((t.size, ))
+        S2 = S * S
+        kdiag = np.empty((t.size,))
 
-        #Dx1 terms
-        c0 = (S2/B)*((.5*np.sqrt(np.pi))*lq)
+        # Dx1 terms
+        c0 = (S2 / B) * ((0.5 * np.sqrt(np.pi)) * lq)
 
-        #DxQ terms
-        nu = lq*(B*.5)
-        nu2 = nu*nu
-        #Nx1 terms
-        gamt = -2.*B
-        gamt = gamt[index]*t
+        # DxQ terms
+        nu = lq * (B * 0.5)
+        nu2 = nu * nu
+        # Nx1 terms
+        gamt = -2.0 * B
+        gamt = gamt[index] * t
 
-        #NxQ terms
-        t_lq = t/lq
+        # NxQ terms
+        t_lq = t / lq
 
         # Upsilon Calculations
         # Using wofz
-        #erfnu = erf(nu)
-        
-        upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :] ,t_lq+nu[index,:] ))
-        upm[t[:, 0] == 0, :] = 0.
+        # erfnu = erf(nu)
 
-        
-        upv = np.exp(nu2[index, :] + gamt + lnDifErf( -t_lq+nu[index,:], nu[index, :] ) )
-        upv[t[:, 0] == 0, :] = 0.
+        upm = np.exp(nu2[index, :] + lnDifErf(nu[index, :], t_lq + nu[index, :]))
+        upm[t[:, 0] == 0, :] = 0.0
 
-        #Covariance calculation
-        #kdiag = np.sum(c0[index, :]*(upm-upv), axis=1)
-        kdiag = c0[index, :]*(upm-upv)
+        upv = np.exp(
+            nu2[index, :] + gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :])
+        )
+        upv[t[:, 0] == 0, :] = 0.0
+
+        # Covariance calculation
+        # kdiag = np.sum(c0[index, :]*(upm-upv), axis=1)
+        kdiag = c0[index, :] * (upm - upv)
         return kdiag
 
-    def update_gradients_full(self, dL_dK, X, X2 = None):
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         self.decay.gradient = np.zeros(self.decay.shape)
         self.W.gradient = np.zeros(self.W.shape)
         self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
         index = np.int_(np.round(X[:, 1]))
-        index = index.reshape(index.size,)
+        index = index.reshape(
+            index.size,
+        )
         X_flag = index[0] >= self.output_dim
         if X2 is None:
-            if X_flag: #Kuu or Kmm
+            if X_flag:  # Kuu or Kmm
                 index -= self.output_dim
-                tmp = dL_dK*self._gkuu_lq(X, index)
+                tmp = dL_dK * self._gkuu_lq(X, index)
                 for q in np.unique(index):
                     ind = np.where(index == q)
                     self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum()
             else:
                 raise NotImplementedError
-        else: #Kfu or Knm
-            #index2 = np.asarray(X2, dtype=np.int)
-            #index2 = index2.reshape(index2.size,)
-            if hasattr(X2, 'values'):
+        else:  # Kfu or Knm
+            # index2 = np.asarray(X2, dtype=int)
+            # index2 = index2.reshape(index2.size,)
+            if hasattr(X2, "values"):
                 X2 = X2.values
             index2 = np.int_(np.round(X2[:, 1]))
-            index2 = index2.reshape(index2.size,)
+            index2 = index2.reshape(
+                index2.size,
+            )
             X2_flag = index2[0] >= self.output_dim
-            if not X_flag and X2_flag: #Kfu
+            if not X_flag and X2_flag:  # Kfu
                 index2 -= self.output_dim
-            else: #Kuf
-                dL_dK = dL_dK.T #so we obtaing dL_Kfu
+            else:  # Kuf
+                dL_dK = dL_dK.T  # so we obtaing dL_Kfu
                 indtemp = index - self.output_dim
                 Xtemp = X
                 X = X2
@@ -228,12 +255,12 @@ class EQ_ODE1(Kern):
                 index = index2
                 index2 = indtemp
             glq, gSdq, gB = self._gkfu(X, index, X2, index2)
-            tmp = dL_dK*glq
+            tmp = dL_dK * glq
             for q in np.unique(index2):
                 ind = np.where(index2 == q)
                 self.lengthscale.gradient[q] = tmp[:, ind].sum()
-            tmpB = dL_dK*gB
-            tmp = dL_dK*gSdq
+            tmpB = dL_dK * gB
+            tmp = dL_dK * gSdq
             for d in np.unique(index):
                 ind = np.where(index == d)
                 self.decay.gradient[d] = tmpB[ind, :].sum()
@@ -242,408 +269,463 @@ class EQ_ODE1(Kern):
                     self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum()
 
     def update_gradients_diag(self, dL_dKdiag, X):
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         self.decay.gradient = np.zeros(self.decay.shape)
         self.W.gradient = np.zeros(self.W.shape)
         self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
         index = np.int_(X[:, 1])
-        index = index.reshape(index.size,)
-        
+        index = index.reshape(
+            index.size,
+        )
+
         glq, gS, gB = self._gkdiag(X, index)
         if dL_dKdiag.size == X.shape[0]:
             dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
-        tmp = dL_dKdiag*glq
+        tmp = dL_dKdiag * glq
         self.lengthscale.gradient = tmp.sum(0)
-        tmpB = dL_dKdiag*gB
-        tmp = dL_dKdiag*gS
+        tmpB = dL_dKdiag * gB
+        tmp = dL_dKdiag * gS
         for d in np.unique(index):
             ind = np.where(index == d)
             self.decay.gradient[d] = tmpB[ind, :].sum()
             self.W.gradient[d, :] = tmp[ind].sum(0)
 
     def gradients_X(self, dL_dK, X, X2=None):
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         index = np.int_(np.round(X[:, 1]))
-        index = index.reshape(index.size,)
+        index = index.reshape(
+            index.size,
+        )
         X_flag = index[0] >= self.output_dim
-        #If input_dim == 1, use this
-        #gX = np.zeros((X.shape[0], 1))
-        #Cheat to allow gradient for input_dim==2
+        # If input_dim == 1, use this
+        # gX = np.zeros((X.shape[0], 1))
+        # Cheat to allow gradient for input_dim==2
         gX = np.zeros(X.shape)
-        if X2 is None: #Kuu or Kmm
+        if X2 is None:  # Kuu or Kmm
             if X_flag:
                 index -= self.output_dim
-                gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0)
+                gX[:, 0] = 2.0 * (dL_dK * self._gkuu_X(X, index)).sum(0)
                 return gX
             else:
                 raise NotImplementedError
-        else: #Kuf or Kmn
-            #index2 = np.asarray(X2, dtype=np.int)
-            #index2 = index2.reshape(index2.size,)
-            if hasattr(X2, 'values'):
+        else:  # Kuf or Kmn
+            # index2 = np.asarray(X2, dtype=int)
+            # index2 = index2.reshape(index2.size,)
+            if hasattr(X2, "values"):
                 X2 = X2.values
             index2 = np.int_(np.round(X2[:, 1]))
-            index2 = index2.reshape(index2.size,)
+            index2 = index2.reshape(
+                index2.size,
+            )
             X2_flag = index2[0] >= self.output_dim
-            if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z
+            if X_flag and not X2_flag:  # gradient of Kuf(Z, X) wrt Z
                 index -= self.output_dim
-                gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1)
+                gX[:, 0] = (dL_dK * self._gkfu_z(X2, index2, X, index).T).sum(1)
                 return gX
             else:
                 raise NotImplementedError
 
-    #---------------------------------------#
+    # ---------------------------------------#
     #             Helper functions          #
-    #---------------------------------------#
+    # ---------------------------------------#
 
-    #Evaluation of squared exponential for LFM
+    # Evaluation of squared exponential for LFM
     def _Kuu(self, X, index):
-        index = index.reshape(index.size,)
-        t = X[:, 0].reshape(X.shape[0],)
-        lq = self.lengthscale.values.reshape(self.rank,)
-        lq2 = lq*lq
-        #Covariance matrix initialization
+        index = index.reshape(
+            index.size,
+        )
+        t = X[:, 0].reshape(
+            X.shape[0],
+        )
+        lq = self.lengthscale.values.reshape(
+            self.rank,
+        )
+        lq2 = lq * lq
+        # Covariance matrix initialization
         kuu = np.zeros((t.size, t.size))
-        #Assign 1. to diagonal terms
-        kuu[np.diag_indices(t.size)] = 1.
-        #Upper triangular indices
+        # Assign 1. to diagonal terms
+        kuu[np.diag_indices(t.size)] = 1.0
+        # Upper triangular indices
         indtri1, indtri2 = np.triu_indices(t.size, 1)
-        #Block Diagonal indices among Upper Triangular indices
+        # Block Diagonal indices among Upper Triangular indices
         ind = np.where(index[indtri1] == index[indtri2])
         indr = indtri1[ind]
         indc = indtri2[ind]
         r = t[indr] - t[indc]
-        r2 = r*r
-        #Calculation of  covariance function
-        kuu[indr, indc] = np.exp(-r2/lq2[index[indr]])
-        #Completion of lower triangular part
+        r2 = r * r
+        # Calculation of  covariance function
+        kuu[indr, indc] = np.exp(-r2 / lq2[index[indr]])
+        # Completion of lower triangular part
         kuu[indc, indr] = kuu[indr, indc]
         return kuu
 
     def _Kusu(self, X, index, X2, index2):
-        index = index.reshape(index.size,)
-        index2 = index2.reshape(index2.size,)
-        t = X[:, 0].reshape(X.shape[0],1)
-        t2 = X2[:, 0].reshape(1,X2.shape[0])
-        lq = self.lengthscale.values.reshape(self.rank,)
-        #Covariance matrix initialization
+        index = index.reshape(
+            index.size,
+        )
+        index2 = index2.reshape(
+            index2.size,
+        )
+        t = X[:, 0].reshape(X.shape[0], 1)
+        t2 = X2[:, 0].reshape(1, X2.shape[0])
+        lq = self.lengthscale.values.reshape(
+            self.rank,
+        )
+        # Covariance matrix initialization
         kuu = np.zeros((t.size, t2.size))
         for q in range(self.rank):
             ind1 = index == q
             ind2 = index2 == q
-            r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
-            r2 = r*r
-            #Calculation of  covariance function
+            r = t[ind1] / lq[q] - t2[0, ind2] / lq[q]
+            r2 = r * r
+            # Calculation of  covariance function
             kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
         return kuu
 
-    #Evaluation of cross-covariance function
+    # Evaluation of cross-covariance function
     def _Kfu(self, X, index, X2, index2):
-        #terms that move along t
+        # terms that move along t
         t = X[:, 0].reshape(X.shape[0], 1)
-        d = np.unique(index) #Output Indexes
+        d = np.unique(index)  # Output Indexes
         B = self.decay.values[d]
         S = self.W.values[d, :]
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        #Output related variables must be column-wise
+        # Output related variables must be column-wise
         B = B.reshape(B.size, 1)
-        #Input related variables must be row-wise
+        # Input related variables must be row-wise
         z = X2[:, 0].reshape(1, X2.shape[0])
         lq = self.lengthscale.values.reshape((1, self.rank))
 
         kfu = np.empty((t.size, z.size))
 
-        #DxQ terms
-        c0 = S*((.5*np.sqrt(np.pi))*lq)
-        nu = B*(.5*lq)
+        # DxQ terms
+        c0 = S * ((0.5 * np.sqrt(np.pi)) * lq)
+        nu = B * (0.5 * lq)
         nu2 = nu**2
-        #1xM terms
-        z_lq = z/lq[0, index2]
-        #NxM terms
-        tz = t-z
-        tz_lq = tz/lq[0, index2]
+        # 1xM terms
+        z_lq = z / lq[0, index2]
+        # NxM terms
+        tz = t - z
+        tz_lq = tz / lq[0, index2]
 
         # Upsilon Calculations
         fullind = np.ix_(index, index2)
 
-        upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind]))
-        upsi[t[:, 0] == 0, :] = 0.
-        #Covariance calculation
-        kfu = c0[fullind]*upsi
+        upsi = np.exp(
+            nu2[fullind]
+            - B[index] * tz
+            + lnDifErf(-tz_lq + nu[fullind], z_lq + nu[fullind])
+        )
+        upsi[t[:, 0] == 0, :] = 0.0
+        # Covariance calculation
+        kfu = c0[fullind] * upsi
 
         return kfu
 
-    #Gradient of Kuu wrt lengthscale
+    # Gradient of Kuu wrt lengthscale
     def _gkuu_lq(self, X, index):
-        t = X[:, 0].reshape(X.shape[0],)
-        index = index.reshape(X.shape[0],)
-        lq = self.lengthscale.values.reshape(self.rank,)
-        lq2 = lq*lq
-        #Covariance matrix initialization
+        t = X[:, 0].reshape(
+            X.shape[0],
+        )
+        index = index.reshape(
+            X.shape[0],
+        )
+        lq = self.lengthscale.values.reshape(
+            self.rank,
+        )
+        lq2 = lq * lq
+        # Covariance matrix initialization
         glq = np.zeros((t.size, t.size))
-        #Upper triangular indices
+        # Upper triangular indices
         indtri1, indtri2 = np.triu_indices(t.size, 1)
-        #Block Diagonal indices among Upper Triangular indices
+        # Block Diagonal indices among Upper Triangular indices
         ind = np.where(index[indtri1] == index[indtri2])
         indr = indtri1[ind]
         indc = indtri2[ind]
         r = t[indr] - t[indc]
-        r2 = r*r
-        r2_lq2 = r2/lq2[index[indr]]
-        #Calculation of  covariance function
+        r2 = r * r
+        r2_lq2 = r2 / lq2[index[indr]]
+        # Calculation of  covariance function
         er2_lq2 = np.exp(-r2_lq2)
-        #Gradient wrt lq
-        c = 2.*r2_lq2/lq[index[indr]]
-        glq[indr, indc] = er2_lq2*c
-        #Complete the lower triangular
+        # Gradient wrt lq
+        c = 2.0 * r2_lq2 / lq[index[indr]]
+        glq[indr, indc] = er2_lq2 * c
+        # Complete the lower triangular
         glq[indc, indr] = glq[indr, indc]
         return glq
 
-    #Be careful this derivative should be transpose it
-    def _gkuu_X(self, X, index): #Diagonal terms are always zero
-        t = X[:, 0].reshape(X.shape[0],)
-        index = index.reshape(index.size,)
-        lq = self.lengthscale.values.reshape(self.rank,)
-        lq2 = lq*lq
-        #Covariance matrix initialization
+    # Be careful this derivative should be transpose it
+    def _gkuu_X(self, X, index):  # Diagonal terms are always zero
+        t = X[:, 0].reshape(
+            X.shape[0],
+        )
+        index = index.reshape(
+            index.size,
+        )
+        lq = self.lengthscale.values.reshape(
+            self.rank,
+        )
+        lq2 = lq * lq
+        # Covariance matrix initialization
         gt = np.zeros((t.size, t.size))
-        #Upper triangular indices
-        indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal
-        #Block Diagonal indices among Upper Triangular indices
+        # Upper triangular indices
+        indtri1, indtri2 = np.triu_indices(t.size, 1)  # Offset of 1 from the diagonal
+        # Block Diagonal indices among Upper Triangular indices
         ind = np.where(index[indtri1] == index[indtri2])
         indr = indtri1[ind]
         indc = indtri2[ind]
         r = t[indr] - t[indc]
-        r2 = r*r
-        r2_lq2 = r2/(-lq2[index[indr]])
-        #Calculation of  covariance function
+        r2 = r * r
+        r2_lq2 = r2 / (-lq2[index[indr]])
+        # Calculation of  covariance function
         er2_lq2 = np.exp(r2_lq2)
-        #Gradient wrt t
-        c = 2.*r/lq2[index[indr]]
-        gt[indr, indc] = er2_lq2*c
-        #Complete the lower triangular
+        # Gradient wrt t
+        c = 2.0 * r / lq2[index[indr]]
+        gt[indr, indc] = er2_lq2 * c
+        # Complete the lower triangular
         gt[indc, indr] = -gt[indr, indc]
         return gt
 
-    #Gradients for Diagonal Kff
+    # Gradients for Diagonal Kff
     def _gkdiag(self, X, index):
-        index = index.reshape(index.size,)
-        #terms that move along t
+        index = index.reshape(
+            index.size,
+        )
+        # terms that move along t
         d = np.unique(index)
         B = self.decay[d].values
         S = self.W[d, :].values
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        #Output related variables must be column-wise
+        # Output related variables must be column-wise
         t = X[:, 0].reshape(X.shape[0], 1)
         B = B.reshape(B.size, 1)
-        S2 = S*S
+        S2 = S * S
 
-        #Input related variables must be row-wise
+        # Input related variables must be row-wise
         lq = self.lengthscale.values.reshape(1, self.rank)
 
         gB = np.empty((t.size,))
         glq = np.empty((t.size, lq.size))
         gS = np.empty((t.size, lq.size))
 
-        #Dx1 terms
-        c0 = S2*lq*np.sqrt(np.pi)
+        # Dx1 terms
+        c0 = S2 * lq * np.sqrt(np.pi)
 
-        #DxQ terms
-        nu = (.5*lq)*B
-        nu2 = nu*nu
-        
-        #Nx1 terms
-        gamt = -B[index]*t
+        # DxQ terms
+        nu = (0.5 * lq) * B
+        nu2 = nu * nu
+
+        # Nx1 terms
+        gamt = -B[index] * t
         egamt = np.exp(gamt)
-        e2gamt = egamt*egamt
+        e2gamt = egamt * egamt
 
-        #NxQ terms
-        t_lq = t/lq
-        t2_lq2 = -t_lq*t_lq
+        # NxQ terms
+        t_lq = t / lq
+        t2_lq2 = -t_lq * t_lq
 
-        etlq2gamt = np.exp(t2_lq2 + gamt) #NXQ
+        etlq2gamt = np.exp(t2_lq2 + gamt)  # NXQ
 
         ##Upsilon calculations
-        #erfnu = erf(nu) #TODO: This can be improved
+        # erfnu = erf(nu) #TODO: This can be improved
 
-        upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :], t_lq + nu[index, :]) )
-        upm[t[:, 0] == 0, :] = 0.
+        upm = np.exp(nu2[index, :] + lnDifErf(nu[index, :], t_lq + nu[index, :]))
+        upm[t[:, 0] == 0, :] = 0.0
 
-        upv = np.exp(nu2[index, :] + 2.*gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) ) #egamt*upv
-        upv[t[:, 0] == 0, :] = 0.
+        upv = np.exp(
+            nu2[index, :] + 2.0 * gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :])
+        )  # egamt*upv
+        upv[t[:, 0] == 0, :] = 0.0
 
-        #Gradient wrt S
-        c0_S = (S/B)*(lq*np.sqrt(np.pi))
+        # Gradient wrt S
+        c0_S = (S / B) * (lq * np.sqrt(np.pi))
 
-        gS = c0_S[index]*(upm - upv)
+        gS = c0_S[index] * (upm - upv)
+
+        # For B
+        CB1 = (0.5 * lq) ** 2 - 0.5 / B**2  # DXQ
+        lq2_2B = (0.5 * lq**2) * (S2 / B)  # DXQ
+        CB2 = 2.0 * etlq2gamt - e2gamt - 1.0  # NxQ
 
-        #For B
-        CB1 = (.5*lq)**2 - .5/B**2 #DXQ
-        lq2_2B = (.5*lq**2)*(S2/B) #DXQ
-        CB2 = 2.*etlq2gamt - e2gamt - 1. #NxQ
-        
         # gradient wrt B NxZ
-        gB = c0[index, :]*(CB1[index, :]*upm - (CB1[index, :] - t/B[index])*upv) + \
-        lq2_2B[index, :]*CB2
+        gB = (
+            c0[index, :] * (CB1[index, :] * upm - (CB1[index, :] - t / B[index]) * upv)
+            + lq2_2B[index, :] * CB2
+        )
 
-        #Gradient wrt lengthscale
-        #DxQ terms
-        c0 = (.5*np.sqrt(np.pi))*(S2/B)*(1.+.5*(lq*B)**2)
-        Clq1 = S2*(lq*.5)
-        glq = c0[index]*(upm - upv) + Clq1[index]*CB2
+        # Gradient wrt lengthscale
+        # DxQ terms
+        c0 = (0.5 * np.sqrt(np.pi)) * (S2 / B) * (1.0 + 0.5 * (lq * B) ** 2)
+        Clq1 = S2 * (lq * 0.5)
+        glq = c0[index] * (upm - upv) + Clq1[index] * CB2
 
         return glq, gS, gB
 
     def _gkfu(self, X, index, Z, index2):
-        index = index.reshape(index.size,)
-        #TODO: reduce memory usage
-        #terms that move along t
+        index = index.reshape(
+            index.size,
+        )
+        # TODO: reduce memory usage
+        # terms that move along t
         d = np.unique(index)
         B = self.decay[d].values
         S = self.W[d, :].values
 
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        #t column
+        # t column
         t = X[:, 0].reshape(X.shape[0], 1)
         B = B.reshape(B.size, 1)
-        #z row
+        # z row
         z = Z[:, 0].reshape(1, Z.shape[0])
-        index2 = index2.reshape(index2.size,)
+        index2 = index2.reshape(
+            index2.size,
+        )
         lq = self.lengthscale.values.reshape((1, self.rank))
 
-        #kfu = np.empty((t.size, z.size))
+        # kfu = np.empty((t.size, z.size))
         glq = np.empty((t.size, z.size))
         gSdq = np.empty((t.size, z.size))
         gB = np.empty((t.size, z.size))
 
-        #Dx1 terms
-        B_2 = B*.5
-        S_pi = S*(.5*np.sqrt(np.pi))
-        #DxQ terms
-        c0 = S_pi*lq #lq*Sdq*sqrt(pi)
-        nu = B*lq*.5
-        nu2 = nu*nu
+        # Dx1 terms
+        B_2 = B * 0.5
+        S_pi = S * (0.5 * np.sqrt(np.pi))
+        # DxQ terms
+        c0 = S_pi * lq  # lq*Sdq*sqrt(pi)
+        nu = B * lq * 0.5
+        nu2 = nu * nu
+
+        # 1xM terms
+        z_lq = z / lq[0, index2]
+
+        # NxM terms
+        tz = t - z
+        tz_lq = tz / lq[0, index2]
+        etz_lq2 = -np.exp(-tz_lq * tz_lq)
+        ez_lq_Bt = np.exp(-z_lq * z_lq - B[index] * t)
 
-        #1xM terms
-        z_lq = z/lq[0, index2]
-        
-        #NxM terms
-        tz = t-z
-        tz_lq = tz/lq[0, index2]
-        etz_lq2 = -np.exp(-tz_lq*tz_lq)
-        ez_lq_Bt = np.exp(-z_lq*z_lq -B[index]*t)
-        
         # Upsilon calculations
         fullind = np.ix_(index, index2)
-        upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind] ) )
-        upsi[t[:, 0] == 0., :] = 0.
+        upsi = np.exp(
+            nu2[fullind]
+            - B[index] * tz
+            + lnDifErf(-tz_lq + nu[fullind], z_lq + nu[fullind])
+        )
+        upsi[t[:, 0] == 0.0, :] = 0.0
 
-        #Gradient wrt S
-        #DxQ term
-        Sa1 = lq*(.5*np.sqrt(np.pi))
+        # Gradient wrt S
+        # DxQ term
+        Sa1 = lq * (0.5 * np.sqrt(np.pi))
 
-        gSdq = Sa1[0,index2]*upsi
+        gSdq = Sa1[0, index2] * upsi
 
-        #Gradient wrt lq
-        la1 = S_pi*(1. + 2.*nu2)
-        Slq = S*lq
-        uplq = etz_lq2*(tz_lq/lq[0, index2] + B_2[index])
-        uplq += ez_lq_Bt*(-z_lq/lq[0, index2] + B_2[index])
+        # Gradient wrt lq
+        la1 = S_pi * (1.0 + 2.0 * nu2)
+        Slq = S * lq
+        uplq = etz_lq2 * (tz_lq / lq[0, index2] + B_2[index])
+        uplq += ez_lq_Bt * (-z_lq / lq[0, index2] + B_2[index])
 
-        glq = la1[fullind]*upsi
-        glq += Slq[fullind]*uplq
+        glq = la1[fullind] * upsi
+        glq += Slq[fullind] * uplq
 
-        #Gradient wrt B
-        Slq = Slq*lq
-        nulq = nu*lq
+        # Gradient wrt B
+        Slq = Slq * lq
+        nulq = nu * lq
         upBd = etz_lq2 + ez_lq_Bt
-        gB = c0[fullind]*(nulq[fullind] - tz)*upsi + .5*Slq[fullind]*upBd
+        gB = c0[fullind] * (nulq[fullind] - tz) * upsi + 0.5 * Slq[fullind] * upBd
 
         return glq, gSdq, gB
 
-    #TODO: reduce memory usage
-    def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z)
-        index = index.reshape(index.size,)
-        #terms that move along t
+    # TODO: reduce memory usage
+    def _gkfu_z(self, X, index, Z, index2):  # Kfu(t,z)
+        index = index.reshape(
+            index.size,
+        )
+        # terms that move along t
         d = np.unique(index)
         B = self.decay[d].values
         S = self.W[d, :].values
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
 
-        #t column
+        # t column
         t = X[:, 0].reshape(X.shape[0], 1)
         B = B.reshape(B.size, 1)
-        #z row
+        # z row
         z = Z[:, 0].reshape(1, Z.shape[0])
-        index2 = index2.reshape(index2.size,)
+        index2 = index2.reshape(
+            index2.size,
+        )
         lq = self.lengthscale.values.reshape((1, self.rank))
 
-        #kfu = np.empty((t.size, z.size))
+        # kfu = np.empty((t.size, z.size))
         gz = np.empty((t.size, z.size))
 
-        #Dx1 terms
-        S_pi =S*(.5*np.sqrt(np.pi))
-        #DxQ terms
-        #Slq = S*lq
-        c0 = S_pi*lq #lq*Sdq*sqrt(pi)
-        nu = (.5*lq)*B
-        nu2 = nu*nu
+        # Dx1 terms
+        S_pi = S * (0.5 * np.sqrt(np.pi))
+        # DxQ terms
+        # Slq = S*lq
+        c0 = S_pi * lq  # lq*Sdq*sqrt(pi)
+        nu = (0.5 * lq) * B
+        nu2 = nu * nu
 
-        #1xM terms
-        z_lq = z/lq[0, index2]
-        z_lq2 = -z_lq*z_lq
-        #NxQ terms
-        t_lq = t/lq
-        #NxM terms
+        # 1xM terms
+        z_lq = z / lq[0, index2]
+        z_lq2 = -z_lq * z_lq
+        # NxQ terms
+        t_lq = t / lq
+        # NxM terms
         zt_lq = z_lq - t_lq[:, index2]
-        zt_lq2 = -zt_lq*zt_lq
+        zt_lq2 = -zt_lq * zt_lq
 
         # Upsilon calculations
         fullind = np.ix_(index, index2)
         z2 = z_lq + nu[fullind]
         z1 = z2 - t_lq[:, index2]
-        upsi = np.exp(nu2[fullind] - B[index]*(t-z) + lnDifErf(z1,z2) )
-        upsi[t[:, 0] == 0., :] = 0.
+        upsi = np.exp(nu2[fullind] - B[index] * (t - z) + lnDifErf(z1, z2))
+        upsi[t[:, 0] == 0.0, :] = 0.0
 
-        #Gradient wrt z
-        za1 = c0*B
-        #za2 = S_w
-        gz = za1[fullind]*upsi + S[fullind]*( np.exp(z_lq2 - B[index]*t) -np.exp(zt_lq2) )
+        # Gradient wrt z
+        za1 = c0 * B
+        # za2 = S_w
+        gz = za1[fullind] * upsi + S[fullind] * (
+            np.exp(z_lq2 - B[index] * t) - np.exp(zt_lq2)
+        )
 
         return gz
-        
-def lnDifErf(z1,z2):
-    #Z2 is always positive
-    logdiferf = np.zeros(z1.shape)        
-    ind = np.where(z1>0.)
-    ind2 = np.where(z1<=0.)
+
+
+def lnDifErf(z1, z2):
+    # Z2 is always positive
+    logdiferf = np.zeros(z1.shape)
+    ind = np.where(z1 > 0.0)
+    ind2 = np.where(z1 <= 0.0)
     if ind[0].shape > 0:
         z1i = z1[ind]
-        z12 = z1i*z1i
+        z12 = z1i * z1i
         z2i = z2[ind]
-        logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i)*np.exp(z12-z2i**2))
-    
+        logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i) * np.exp(z12 - z2i**2))
+
     if ind2[0].shape > 0:
         z1i = z1[ind2]
         z2i = z2[ind2]
         logdiferf[ind2] = np.log(erf(z2i) - erf(z1i))
-        
-    return logdiferf
\ No newline at end of file
+
+    return logdiferf
diff --git a/GPy/kern/src/eq_ode2.py b/GPy/kern/src/eq_ode2.py
index 0166c511..27b15b87 100644
--- a/GPy/kern/src/eq_ode2.py
+++ b/GPy/kern/src/eq_ode2.py
@@ -8,6 +8,7 @@ from ...core.parameterization import Param
 from paramz.transformations import Logexp
 from paramz.caching import Cache_this
 
+
 class EQ_ODE2(Kern):
     """
     Covariance function for second order differential equation driven by an exponentiated quadratic covariance.
@@ -30,24 +31,38 @@ class EQ_ODE2(Kern):
     :type B: array of length output_dim.
 
     """
-    #This code will only work for the sparseGP model, due to limitations in models for this kernel
-    def __init__(self, input_dim=2, output_dim=1, rank=1, W=None, lengthscale=None, C=None, B=None, active_dims=None, name='eq_ode2'):
-        #input_dim should be 1, but kern._slice_X is not returning index information required to evaluate kernels        
+
+    # This code will only work for the sparseGP model, due to limitations in models for this kernel
+    def __init__(
+        self,
+        input_dim=2,
+        output_dim=1,
+        rank=1,
+        W=None,
+        lengthscale=None,
+        C=None,
+        B=None,
+        active_dims=None,
+        name="eq_ode2",
+    ):
+        # input_dim should be 1, but kern._slice_X is not returning index information required to evaluate kernels
         assert input_dim == 2, "only defined for 1 input dims"
-        super(EQ_ODE2, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name)
+        super(EQ_ODE2, self).__init__(
+            input_dim=input_dim, active_dims=active_dims, name=name
+        )
         self.rank = rank
         self.output_dim = output_dim
 
         if lengthscale is None:
-            lengthscale = .5+np.random.rand(self.rank)
+            lengthscale = 0.5 + np.random.rand(self.rank)
         else:
             lengthscale = np.asarray(lengthscale)
             assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
             if lengthscale.size != self.rank:
-                lengthscale = np.ones(self.rank)*lengthscale
+                lengthscale = np.ones(self.rank) * lengthscale
 
         if W is None:
-            #W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
+            # W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
             W = np.ones((self.output_dim, self.rank))
         else:
             assert W.shape == (self.output_dim, self.rank)
@@ -58,270 +73,294 @@ class EQ_ODE2(Kern):
         if B is None:
             B = np.ones(self.output_dim)
 
-        self.C = Param('C', C, Logexp())
-        self.B = Param('B', B, Logexp())
-        self.lengthscale = Param('lengthscale', lengthscale, Logexp())
-        self.W = Param('W', W)
+        self.C = Param("C", C, Logexp())
+        self.B = Param("B", B, Logexp())
+        self.lengthscale = Param("lengthscale", lengthscale, Logexp())
+        self.W = Param("W", W)
         self.link_parameters(self.lengthscale, self.C, self.B, self.W)
 
     @Cache_this(limit=3)
     def K(self, X, X2=None):
-        #This way is not working, indexes are lost after using k._slice_X
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+        # This way is not working, indexes are lost after using k._slice_X
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         index = np.int_(np.round(X[:, 1]))
-        index = index.reshape(index.size,)
+        index = index.reshape(
+            index.size,
+        )
         X_flag = index[0] >= self.output_dim
         if X2 is None:
             if X_flag:
-                #Calculate covariance function for the latent functions
+                # Calculate covariance function for the latent functions
                 index -= self.output_dim
                 return self._Kuu(X, index)
-            else: #Kff full
+            else:  # Kff full
                 raise NotImplementedError
         else:
-            #This way is not working, indexes are lost after using k._slice_X
-            #index2 = np.asarray(X2, dtype=np.int)
-            #index2 = index2.reshape(index2.size,)
-            if hasattr(X2, 'values'):
+            # This way is not working, indexes are lost after using k._slice_X
+            # index2 = np.asarray(X2, dtype=int)
+            # index2 = index2.reshape(index2.size,)
+            if hasattr(X2, "values"):
                 X2 = X2.values
             index2 = np.int_(np.round(X2[:, 1]))
-            index2 = index2.reshape(index2.size,)
+            index2 = index2.reshape(
+                index2.size,
+            )
             X2_flag = index2[0] >= self.output_dim
-            #Calculate cross-covariance function
+            # Calculate cross-covariance function
             if not X_flag and X2_flag:
                 index2 -= self.output_dim
-                return self._Kfu(X, index, X2, index2) #Kfu
+                return self._Kfu(X, index, X2, index2)  # Kfu
             elif X_flag and not X2_flag:
                 index -= self.output_dim
-                return self._Kfu(X2, index2, X, index).T #Kuf
+                return self._Kfu(X2, index2, X, index).T  # Kuf
             elif X_flag and X2_flag:
                 index -= self.output_dim
                 index2 -= self.output_dim
-                return self._Kusu(X, index, X2, index2) #Ku_s u
+                return self._Kusu(X, index, X2, index2)  # Ku_s u
             else:
-                raise NotImplementedError #Kf_s f
+                raise NotImplementedError  # Kf_s f
 
-    #Calculate the covariance function for diag(Kff(X,X))
+    # Calculate the covariance function for diag(Kff(X,X))
     def Kdiag(self, X):
-        if hasattr(X, 'values'):
+        if hasattr(X, "values"):
             index = np.int_(np.round(X[:, 1].values))
         else:
             index = np.int_(np.round(X[:, 1]))
-        index = index.reshape(index.size,)
+        index = index.reshape(
+            index.size,
+        )
         X_flag = index[0] >= self.output_dim
-        
-        if X_flag: #Kuudiag        
-            return np.ones(X[:,0].shape)
-        else: #Kffdiag
+
+        if X_flag:  # Kuudiag
+            return np.ones(X[:, 0].shape)
+        else:  # Kffdiag
             kdiag = self._Kdiag(X)
             return np.sum(kdiag, axis=1)
 
-    #Calculate the covariance function for diag(Kff(X,X))
+    # Calculate the covariance function for diag(Kff(X,X))
     def _Kdiag(self, X):
-        #This way is not working, indexes are lost after using k._slice_X
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+        # This way is not working, indexes are lost after using k._slice_X
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         index = np.int_(X[:, 1])
-        index = index.reshape(index.size,)
-        
-        #terms that move along t
+        index = index.reshape(
+            index.size,
+        )
+
+        # terms that move along t
         t = X[:, 0].reshape(X.shape[0], 1)
-        d = np.unique(index) #Output Indexes
+        d = np.unique(index)  # Output Indexes
         B = self.B.values[d]
         C = self.C.values[d]
         S = self.W.values[d, :]
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        #Check where wd becomes complex
-        wbool = C*C >= 4.*B
+        # Check where wd becomes complex
+        wbool = C * C >= 4.0 * B
         B = B.reshape(B.size, 1)
         C = C.reshape(C.size, 1)
-        alpha = .5*C
-        C2 = C*C
+        alpha = 0.5 * C
+        C2 = C * C
 
         wbool2 = wbool[index]
         ind2t = np.where(wbool2)
         ind3t = np.where(np.logical_not(wbool2))
 
-        #Terms that move along q
+        # Terms that move along q
         lq = self.lengthscale.values.reshape(1, self.lengthscale.size)
-        S2 = S*S
+        S2 = S * S
         kdiag = np.empty((t.size, lq.size))
 
         indD = np.arange(B.size)
-        #(1) When wd is real
+        # (1) When wd is real
         if np.any(np.logical_not(wbool)):
-            #Indexes of index and t related to (2)
+            # Indexes of index and t related to (2)
             t1 = t[ind3t]
             ind = index[ind3t]
-            d = np.asarray(np.where(np.logical_not(wbool))[0]) #Selection of outputs
+            d = np.asarray(np.where(np.logical_not(wbool))[0])  # Selection of outputs
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            S2lq = S2[d]*(.5*lq)
-            c0 = S2lq*np.sqrt(np.pi)
-            w = .5*np.sqrt(4.*B[d] - C2[d])
+            # Dx1 terms
+            S2lq = S2[d] * (0.5 * lq)
+            c0 = S2lq * np.sqrt(np.pi)
+            w = 0.5 * np.sqrt(4.0 * B[d] - C2[d])
             alphad = alpha[d]
-            w2 = w*w
-            gam = alphad + 1j*w
-            gamc = alphad - 1j*w
-            c1 = .5/(alphad*w2)
-            c2 = .5/(gam*w2)
+            w2 = w * w
+            gam = alphad + 1j * w
+            gamc = alphad - 1j * w
+            c1 = 0.5 / (alphad * w2)
+            c2 = 0.5 / (gam * w2)
             c = c1 - c2
-            #DxQ terms
-            nu = lq*(gam*.5)
-            K01 = c0*c
-            #Nx1 terms
-            gamt = -gam[ind]*t1
-            gamct = -gamc[ind]*t1
+            # DxQ terms
+            nu = lq * (gam * 0.5)
+            K01 = c0 * c
+            # Nx1 terms
+            gamt = -gam[ind] * t1
+            gamct = -gamc[ind] * t1
             egamt = np.exp(gamt)
-            ec = egamt*c2[ind] - np.exp(gamct)*c1[ind]
-            #NxQ terms
-            t_lq = t1/lq
+            ec = egamt * c2[ind] - np.exp(gamct) * c1[ind]
+            # NxQ terms
+            t_lq = t1 / lq
 
             # Upsilon Calculations
             # Using wofz
-            wnu = wofz(1j*nu)
+            wnu = wofz(1j * nu)
             lwnu = np.log(wnu)
-            t2_lq2 = -t_lq*t_lq
-            upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j*(t_lq + nu[ind]))))
-            upm[t1[:, 0] == 0, :] = 0.
+            t2_lq2 = -t_lq * t_lq
+            upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j * (t_lq + nu[ind]))))
+            upm[t1[:, 0] == 0, :] = 0.0
 
-            nu2 = nu*nu
+            nu2 = nu * nu
             z1 = nu[ind] - t_lq
-            indv1 = np.where(z1.real >= 0.)
-            indv2 = np.where(z1.real < 0.)
+            indv1 = np.where(z1.real >= 0.0)
+            indv2 = np.where(z1.real < 0.0)
             upv = -np.exp(lwnu[ind] + gamt)
             if indv1[0].shape > 0:
-                upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1])))
+                upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1])))
             if indv2[0].shape > 0:
-                upv[indv2] += np.exp(nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.))\
-                             - np.exp(t2_lq2[indv2] + np.log(wofz(-1j*z1[indv2])))
-            upv[t1[:, 0] == 0, :] = 0.
+                upv[indv2] += np.exp(
+                    nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.0)
+                ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2])))
+            upv[t1[:, 0] == 0, :] = 0.0
 
-            #Covariance calculation
-            kdiag[ind3t] = np.real(K01[ind]*upm)
-            kdiag[ind3t] += np.real((c0[ind]*ec)*upv)
+            # Covariance calculation
+            kdiag[ind3t] = np.real(K01[ind] * upm)
+            kdiag[ind3t] += np.real((c0[ind] * ec) * upv)
 
-        #(2) When w_d is complex
+        # (2) When w_d is complex
         if np.any(wbool):
             t1 = t[ind2t]
             ind = index[ind2t]
-            #Index transformation
+            # Index transformation
             d = np.asarray(np.where(wbool)[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            S2lq = S2[d]*(lq*.25)
-            c0 = S2lq*np.sqrt(np.pi)
-            w = .5*np.sqrt(C2[d] - 4.*B[d])
+            # Dx1 terms
+            S2lq = S2[d] * (lq * 0.25)
+            c0 = S2lq * np.sqrt(np.pi)
+            w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d])
             alphad = alpha[d]
             gam = alphad - w
             gamc = alphad + w
-            w2 = -w*w
-            c1 = .5/(alphad*w2)
-            c21 = .5/(gam*w2)
-            c22 = .5/(gamc*w2)
+            w2 = -w * w
+            c1 = 0.5 / (alphad * w2)
+            c21 = 0.5 / (gam * w2)
+            c22 = 0.5 / (gamc * w2)
             c = c1 - c21
             c2 = c1 - c22
-            #DxQ terms
-            K011 = c0*c
-            K012 = c0*c2
-            nu = lq*(.5*gam)
-            nuc = lq*(.5*gamc)
-            #Nx1 terms
-            gamt = -gam[ind]*t1
-            gamct = -gamc[ind]*t1
+            # DxQ terms
+            K011 = c0 * c
+            K012 = c0 * c2
+            nu = lq * (0.5 * gam)
+            nuc = lq * (0.5 * gamc)
+            # Nx1 terms
+            gamt = -gam[ind] * t1
+            gamct = -gamc[ind] * t1
             egamt = np.exp(gamt)
             egamct = np.exp(gamct)
-            ec = egamt*c21[ind] - egamct*c1[ind]
-            ec2 = egamct*c22[ind] - egamt*c1[ind]
-            #NxQ terms
-            t_lq = t1/lq
+            ec = egamt * c21[ind] - egamct * c1[ind]
+            ec2 = egamct * c22[ind] - egamt * c1[ind]
+            # NxQ terms
+            t_lq = t1 / lq
 
-            #Upsilon Calculations using wofz
-            t2_lq2 = -t_lq*t_lq #Required when using wofz
-            wnu = wofz(1j*nu).real
+            # Upsilon Calculations using wofz
+            t2_lq2 = -t_lq * t_lq  # Required when using wofz
+            wnu = wofz(1j * nu).real
             lwnu = np.log(wnu)
-            upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j*(t_lq + nu[ind])).real))
-            upm[t1[:, 0] == 0., :] = 0.
+            upm = wnu[ind] - np.exp(
+                t2_lq2 + gamt + np.log(wofz(1j * (t_lq + nu[ind])).real)
+            )
+            upm[t1[:, 0] == 0.0, :] = 0.0
 
-            nu2 = nu*nu
+            nu2 = nu * nu
             z1 = nu[ind] - t_lq
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             upv = -np.exp(lwnu[ind] + gamt)
             if indv1[0].shape > 0:
-                upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real))
             if indv2[0].shape > 0:
-                upv[indv2] += np.exp(nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.))\
-                              - np.exp(t2_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real))
-            upv[t1[:, 0] == 0, :] = 0.
+                upv[indv2] += np.exp(
+                    nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.0)
+                ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upv[t1[:, 0] == 0, :] = 0.0
 
-            wnuc = wofz(1j*nuc).real
+            wnuc = wofz(1j * nuc).real
             lwnuc = np.log(wnuc)
 
-            upmc = wnuc[ind] - np.exp(t2_lq2 + gamct + np.log(wofz(1j*(t_lq + nuc[ind])).real))
-            upmc[t1[:, 0] == 0., :] = 0.
+            upmc = wnuc[ind] - np.exp(
+                t2_lq2 + gamct + np.log(wofz(1j * (t_lq + nuc[ind])).real)
+            )
+            upmc[t1[:, 0] == 0.0, :] = 0.0
 
-            nuc2 = nuc*nuc
+            nuc2 = nuc * nuc
             z1 = nuc[ind] - t_lq
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
-            upvc = - np.exp(lwnuc[ind] + gamct)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
+            upvc = -np.exp(lwnuc[ind] + gamct)
             if indv1[0].shape > 0:
-                upvc[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upvc[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real))
             if indv2[0].shape > 0:
-                upvc[indv2] += np.exp(nuc2[ind[indv2[0]], indv2[1]] + gamct[indv2[0], 0] + np.log(2.))\
-                               - np.exp(t2_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real))
-            upvc[t1[:, 0] == 0, :] = 0.
+                upvc[indv2] += np.exp(
+                    nuc2[ind[indv2[0]], indv2[1]] + gamct[indv2[0], 0] + np.log(2.0)
+                ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upvc[t1[:, 0] == 0, :] = 0.0
 
-            #Covariance calculation
-            kdiag[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc
+            # Covariance calculation
+            kdiag[ind2t] = (
+                K011[ind] * upm
+                + K012[ind] * upmc
+                + (c0[ind] * ec) * upv
+                + (c0[ind] * ec2) * upvc
+            )
         return kdiag
 
-    def update_gradients_full(self, dL_dK, X, X2 = None):
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         self.B.gradient = np.zeros(self.B.shape)
         self.C.gradient = np.zeros(self.C.shape)
         self.W.gradient = np.zeros(self.W.shape)
         self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
         index = np.int_(X[:, 1])
-        index = index.reshape(index.size,)
+        index = index.reshape(
+            index.size,
+        )
         X_flag = index[0] >= self.output_dim
         if X2 is None:
-            if X_flag: #Kuu or Kmm
+            if X_flag:  # Kuu or Kmm
                 index -= self.output_dim
-                tmp = dL_dK*self._gkuu_lq(X, index)
+                tmp = dL_dK * self._gkuu_lq(X, index)
                 for q in np.unique(index):
                     ind = np.where(index == q)
                     self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum()
             else:
                 raise NotImplementedError
-        else: #Kfu or Knm
-            #index2 = np.asarray(X2, dtype=np.int)
-            #index2 = index2.reshape(index2.size,)
-            if hasattr(X2, 'values'):
+        else:  # Kfu or Knm
+            # index2 = np.asarray(X2, dtype=int)
+            # index2 = index2.reshape(index2.size,)
+            if hasattr(X2, "values"):
                 X2 = X2.values
             index2 = np.int_(X2[:, 1])
-            index2 = index2.reshape(index2.size,)
+            index2 = index2.reshape(
+                index2.size,
+            )
             X2_flag = index2[0] >= self.output_dim
             if not X_flag and X2_flag:
                 index2 -= self.output_dim
             else:
-                dL_dK = dL_dK.T #so we obtaing dL_Kfu
+                dL_dK = dL_dK.T  # so we obtaing dL_Kfu
                 indtemp = index - self.output_dim
                 Xtemp = X
                 X = X2
@@ -329,13 +368,13 @@ class EQ_ODE2(Kern):
                 index = index2
                 index2 = indtemp
             glq, gSdq, gB, gC = self._gkfu(X, index, X2, index2)
-            tmp = dL_dK*glq
+            tmp = dL_dK * glq
             for q in np.unique(index2):
                 ind = np.where(index2 == q)
                 self.lengthscale.gradient[q] = tmp[:, ind].sum()
-            tmpB = dL_dK*gB
-            tmpC = dL_dK*gC
-            tmp = dL_dK*gSdq
+            tmpB = dL_dK * gB
+            tmpC = dL_dK * gC
+            tmp = dL_dK * gSdq
             for d in np.unique(index):
                 ind = np.where(index == d)
                 self.B.gradient[d] = tmpB[ind, :].sum()
@@ -345,25 +384,27 @@ class EQ_ODE2(Kern):
                     self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum()
 
     def update_gradients_diag(self, dL_dKdiag, X):
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         self.B.gradient = np.zeros(self.B.shape)
         self.C.gradient = np.zeros(self.C.shape)
         self.W.gradient = np.zeros(self.W.shape)
         self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
         index = np.int_(X[:, 1])
-        index = index.reshape(index.size,)
-        
+        index = index.reshape(
+            index.size,
+        )
+
         glq, gS, gB, gC = self._gkdiag(X, index)
         if dL_dKdiag.size == X.shape[0]:
             dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
-        tmp = dL_dKdiag*glq
+        tmp = dL_dKdiag * glq
         self.lengthscale.gradient = tmp.sum(0)
-        tmpB = dL_dKdiag*gB
-        tmpC = dL_dKdiag*gC
-        tmp = dL_dKdiag*gS
+        tmpB = dL_dKdiag * gB
+        tmpC = dL_dKdiag * gC
+        tmp = dL_dKdiag * gS
         for d in np.unique(index):
             ind = np.where(index == d)
             self.B.gradient[d] = tmpB[ind, :].sum()
@@ -371,107 +412,123 @@ class EQ_ODE2(Kern):
             self.W.gradient[d, :] = tmp[ind].sum(0)
 
     def gradients_X(self, dL_dK, X, X2=None):
-        #index = np.asarray(X, dtype=np.int)
-        #index = index.reshape(index.size,)
-        if hasattr(X, 'values'):
+        # index = np.asarray(X, dtype=int)
+        # index = index.reshape(index.size,)
+        if hasattr(X, "values"):
             X = X.values
         index = np.int_(X[:, 1])
-        index = index.reshape(index.size,)
+        index = index.reshape(
+            index.size,
+        )
         X_flag = index[0] >= self.output_dim
-        #If input_dim == 1, use this
-        #gX = np.zeros((X.shape[0], 1))
-        #Cheat to allow gradient for input_dim==2
+        # If input_dim == 1, use this
+        # gX = np.zeros((X.shape[0], 1))
+        # Cheat to allow gradient for input_dim==2
         gX = np.zeros(X.shape)
-        if X2 is None: #Kuu or Kmm
+        if X2 is None:  # Kuu or Kmm
             if X_flag:
                 index -= self.output_dim
-                gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0)
+                gX[:, 0] = 2.0 * (dL_dK * self._gkuu_X(X, index)).sum(0)
                 return gX
             else:
                 raise NotImplementedError
-        else: #Kuf or Kmn
-            #index2 = np.asarray(X2, dtype=np.int)
-            #index2 = index2.reshape(index2.size,)
-            if hasattr(X2, 'values'):
+        else:  # Kuf or Kmn
+            # index2 = np.asarray(X2, dtype=int)
+            # index2 = index2.reshape(index2.size,)
+            if hasattr(X2, "values"):
                 X2 = X2.values
             index2 = np.int_(X2[:, 1])
-            index2 = index2.reshape(index2.size,)
+            index2 = index2.reshape(
+                index2.size,
+            )
             X2_flag = index2[0] >= self.output_dim
-            if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z
+            if X_flag and not X2_flag:  # gradient of Kuf(Z, X) wrt Z
                 index -= self.output_dim
-                gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1)
+                gX[:, 0] = (dL_dK * self._gkfu_z(X2, index2, X, index).T).sum(1)
                 return gX
             else:
                 raise NotImplementedError
 
-    #---------------------------------------#
+    # ---------------------------------------#
     #             Helper functions          #
-    #---------------------------------------#
+    # ---------------------------------------#
 
-    #Evaluation of squared exponential for LFM
+    # Evaluation of squared exponential for LFM
     def _Kuu(self, X, index):
-        index = index.reshape(index.size,)
-        t = X[:, 0].reshape(X.shape[0],)
-        lq = self.lengthscale.values.reshape(self.rank,)
-        lq2 = lq*lq
-        #Covariance matrix initialization
+        index = index.reshape(
+            index.size,
+        )
+        t = X[:, 0].reshape(
+            X.shape[0],
+        )
+        lq = self.lengthscale.values.reshape(
+            self.rank,
+        )
+        lq2 = lq * lq
+        # Covariance matrix initialization
         kuu = np.zeros((t.size, t.size))
-        #Assign 1. to diagonal terms
-        kuu[np.diag_indices(t.size)] = 1.
-        #Upper triangular indices
+        # Assign 1. to diagonal terms
+        kuu[np.diag_indices(t.size)] = 1.0
+        # Upper triangular indices
         indtri1, indtri2 = np.triu_indices(t.size, 1)
-        #Block Diagonal indices among Upper Triangular indices
+        # Block Diagonal indices among Upper Triangular indices
         ind = np.where(index[indtri1] == index[indtri2])
         indr = indtri1[ind]
         indc = indtri2[ind]
         r = t[indr] - t[indc]
-        r2 = r*r
-        #Calculation of  covariance function
-        kuu[indr, indc] = np.exp(-r2/lq2[index[indr]])
-        #Completation of lower triangular part
+        r2 = r * r
+        # Calculation of  covariance function
+        kuu[indr, indc] = np.exp(-r2 / lq2[index[indr]])
+        # Completation of lower triangular part
         kuu[indc, indr] = kuu[indr, indc]
         return kuu
 
     def _Kusu(self, X, index, X2, index2):
-        index = index.reshape(index.size,)
-        index2 = index2.reshape(index2.size,)
-        t = X[:, 0].reshape(X.shape[0],1)
-        t2 = X2[:, 0].reshape(1,X2.shape[0])
-        lq = self.lengthscale.values.reshape(self.rank,)
-        #Covariance matrix initialization
+        index = index.reshape(
+            index.size,
+        )
+        index2 = index2.reshape(
+            index2.size,
+        )
+        t = X[:, 0].reshape(X.shape[0], 1)
+        t2 = X2[:, 0].reshape(1, X2.shape[0])
+        lq = self.lengthscale.values.reshape(
+            self.rank,
+        )
+        # Covariance matrix initialization
         kuu = np.zeros((t.size, t2.size))
         for q in range(self.rank):
             ind1 = index == q
             ind2 = index2 == q
-            r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
-            r2 = r*r
-            #Calculation of  covariance function
+            r = t[ind1] / lq[q] - t2[0, ind2] / lq[q]
+            r2 = r * r
+            # Calculation of  covariance function
             kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
         return kuu
 
-    #Evaluation of cross-covariance function
+    # Evaluation of cross-covariance function
     def _Kfu(self, X, index, X2, index2):
-        #terms that move along t
+        # terms that move along t
         t = X[:, 0].reshape(X.shape[0], 1)
-        d = np.unique(index) #Output Indexes
+        d = np.unique(index)  # Output Indexes
         B = self.B.values[d]
         C = self.C.values[d]
         S = self.W.values[d, :]
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        #Check where wd becomes complex
-        wbool = C*C >= 4.*B
-        #Output related variables must be column-wise
+        # Check where wd becomes complex
+        wbool = C * C >= 4.0 * B
+        # Output related variables must be column-wise
         C = C.reshape(C.size, 1)
         B = B.reshape(B.size, 1)
-        C2 = C*C
-        #Input related variables must be row-wise
+        C2 = C * C
+        # Input related variables must be row-wise
         z = X2[:, 0].reshape(1, X2.shape[0])
         lq = self.lengthscale.values.reshape((1, self.rank))
-        #print np.max(z), np.max(z/lq[0, index2])
-        alpha = .5*C
+        # print np.max(z), np.max(z/lq[0, index2])
+        alpha = 0.5 * C
 
         wbool2 = wbool[index]
         ind2t = np.where(wbool2)
@@ -480,196 +537,214 @@ class EQ_ODE2(Kern):
         kfu = np.empty((t.size, z.size))
 
         indD = np.arange(B.size)
-        #(1) when wd is real
+        # (1) when wd is real
         if np.any(np.logical_not(wbool)):
-            #Indexes of index and t related to (2)
+            # Indexes of index and t related to (2)
             t1 = t[ind3t]
             ind = index[ind3t]
-            #Index transformation
+            # Index transformation
             d = np.asarray(np.where(np.logical_not(wbool))[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            w = .5*np.sqrt(4.*B[d] - C2[d])
+            # Dx1 terms
+            w = 0.5 * np.sqrt(4.0 * B[d] - C2[d])
             alphad = alpha[d]
-            gam = alphad - 1j*w
+            gam = alphad - 1j * w
 
-            #DxQ terms
-            Slq = (S[d]/w)*(.5*lq)
-            c0 = Slq*np.sqrt(np.pi)
-            nu = gam*(.5*lq)
-            #1xM terms
-            z_lq = z/lq[0, index2]
-            #NxQ terms
-            t_lq = t1/lq
-            #NxM terms
+            # DxQ terms
+            Slq = (S[d] / w) * (0.5 * lq)
+            c0 = Slq * np.sqrt(np.pi)
+            nu = gam * (0.5 * lq)
+            # 1xM terms
+            z_lq = z / lq[0, index2]
+            # NxQ terms
+            t_lq = t1 / lq
+            # NxM terms
             zt_lq = z_lq - t_lq[:, index2]
 
             # Upsilon Calculations
-            #Using wofz
-            tz = t1-z
+            # Using wofz
+            tz = t1 - z
             fullind = np.ix_(ind, index2)
-            zt_lq2 = -zt_lq*zt_lq
-            z_lq2 = -z_lq*z_lq
-            gamt = -gam[ind]*t1
+            zt_lq2 = -zt_lq * zt_lq
+            z_lq2 = -z_lq * z_lq
+            gamt = -gam[ind] * t1
 
-            upsi = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind]))))
+            upsi = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind]))))
             z1 = zt_lq + nu[fullind]
-            indv1 = np.where(z1.real >= 0.)
-            indv2 = np.where(z1.real < 0.)
+            indv1 = np.where(z1.real >= 0.0)
+            indv2 = np.where(z1.real < 0.0)
             if indv1[0].shape > 0:
-                upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1])))
+                upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1])))
             if indv2[0].shape > 0:
-                nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2])))
-            upsi[t1[:, 0] == 0., :] = 0.
+                nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi[indv2] += np.exp(
+                    nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2])))
+            upsi[t1[:, 0] == 0.0, :] = 0.0
 
-            #Covariance calculation
-            kfu[ind3t] = c0[fullind]*upsi.imag
+            # Covariance calculation
+            kfu[ind3t] = c0[fullind] * upsi.imag
 
-        #(2) when wd is complex
+        # (2) when wd is complex
         if np.any(wbool):
-            #Indexes of index and t related to (2)
+            # Indexes of index and t related to (2)
             t1 = t[ind2t]
             ind = index[ind2t]
-            #Index transformation
+            # Index transformation
             d = np.asarray(np.where(wbool)[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            w = .5*np.sqrt(C2[d] - 4.*B[d])
+            # Dx1 terms
+            w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d])
             alphad = alpha[d]
             gam = alphad - w
             gamc = alphad + w
-            #DxQ terms
-            Slq = S[d]*(lq*.25)
-            c0 = -Slq*(np.sqrt(np.pi)/w)
-            nu = gam*(lq*.5)
-            nuc = gamc*(lq*.5)
-            #1xM terms
-            z_lq = z/lq[0, index2]
-            #NxQ terms
-            t_lq = t1/lq[0, index2]
-            #NxM terms
+            # DxQ terms
+            Slq = S[d] * (lq * 0.25)
+            c0 = -Slq * (np.sqrt(np.pi) / w)
+            nu = gam * (lq * 0.5)
+            nuc = gamc * (lq * 0.5)
+            # 1xM terms
+            z_lq = z / lq[0, index2]
+            # NxQ terms
+            t_lq = t1 / lq[0, index2]
+            # NxM terms
             zt_lq = z_lq - t_lq
 
             # Upsilon Calculations
-            tz = t1-z
-            z_lq2 = -z_lq*z_lq
-            zt_lq2 = -zt_lq*zt_lq
-            gamt = -gam[ind]*t1
-            gamct = -gamc[ind]*t1
+            tz = t1 - z
+            z_lq2 = -z_lq * z_lq
+            zt_lq2 = -zt_lq * zt_lq
+            gamt = -gam[ind] * t1
+            gamct = -gamc[ind] * t1
             fullind = np.ix_(ind, index2)
-            upsi = np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])).real))\
-                   - np.exp(z_lq2 + gamct + np.log(wofz(1j*(z_lq + nuc[fullind])).real))
+            upsi = np.exp(
+                z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])).real)
+            ) - np.exp(z_lq2 + gamct + np.log(wofz(1j * (z_lq + nuc[fullind])).real))
 
             z1 = zt_lq + nu[fullind]
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             if indv1[0].shape > 0:
-                upsi[indv1] -= np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upsi[indv1] -= np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real))
             if indv2[0].shape > 0:
-                nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi[indv2] -= np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real))
+                nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi[indv2] -= np.exp(
+                    nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
             z1 = zt_lq + nuc[fullind]
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             if indv1[0].shape > 0:
-                upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real))
             if indv2[0].shape > 0:
-                nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi[indv2] += np.exp(nuac2 - gamc[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real))
-            upsi[t1[:, 0] == 0., :] = 0.
+                nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi[indv2] += np.exp(
+                    nuac2 - gamc[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upsi[t1[:, 0] == 0.0, :] = 0.0
 
-            kfu[ind2t] = c0[np.ix_(ind, index2)]*upsi
+            kfu[ind2t] = c0[np.ix_(ind, index2)] * upsi
         return kfu
 
-    #Gradient of Kuu wrt lengthscale
+    # Gradient of Kuu wrt lengthscale
     def _gkuu_lq(self, X, index):
-        t = X[:, 0].reshape(X.shape[0],)
-        index = index.reshape(X.shape[0],)
-        lq = self.lengthscale.values.reshape(self.rank,)
-        lq2 = lq*lq
-        #Covariance matrix initialization
+        t = X[:, 0].reshape(
+            X.shape[0],
+        )
+        index = index.reshape(
+            X.shape[0],
+        )
+        lq = self.lengthscale.values.reshape(
+            self.rank,
+        )
+        lq2 = lq * lq
+        # Covariance matrix initialization
         glq = np.zeros((t.size, t.size))
-        #Upper triangular indices
+        # Upper triangular indices
         indtri1, indtri2 = np.triu_indices(t.size, 1)
-        #Block Diagonal indices among Upper Triangular indices
+        # Block Diagonal indices among Upper Triangular indices
         ind = np.where(index[indtri1] == index[indtri2])
         indr = indtri1[ind]
         indc = indtri2[ind]
         r = t[indr] - t[indc]
-        r2 = r*r
-        r2_lq2 = r2/lq2[index[indr]]
-        #Calculation of  covariance function
+        r2 = r * r
+        r2_lq2 = r2 / lq2[index[indr]]
+        # Calculation of  covariance function
         er2_lq2 = np.exp(-r2_lq2)
-        #Gradient wrt lq
-        c = 2.*r2_lq2/lq[index[indr]]
-        glq[indr, indc] = er2_lq2*c
-        #Complete the lower triangular
+        # Gradient wrt lq
+        c = 2.0 * r2_lq2 / lq[index[indr]]
+        glq[indr, indc] = er2_lq2 * c
+        # Complete the lower triangular
         glq[indc, indr] = glq[indr, indc]
         return glq
 
-    #Be careful this derivative should be transpose it
-    def _gkuu_X(self, X, index): #Diagonal terms are always zero
-        t = X[:, 0].reshape(X.shape[0],)
-        index = index.reshape(index.size,)
-        lq = self.lengthscale.values.reshape(self.rank,)
-        lq2 = lq*lq
-        #Covariance matrix initialization
+    # Be careful this derivative should be transpose it
+    def _gkuu_X(self, X, index):  # Diagonal terms are always zero
+        t = X[:, 0].reshape(
+            X.shape[0],
+        )
+        index = index.reshape(
+            index.size,
+        )
+        lq = self.lengthscale.values.reshape(
+            self.rank,
+        )
+        lq2 = lq * lq
+        # Covariance matrix initialization
         gt = np.zeros((t.size, t.size))
-        #Upper triangular indices
-        indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal
-        #Block Diagonal indices among Upper Triangular indices
+        # Upper triangular indices
+        indtri1, indtri2 = np.triu_indices(t.size, 1)  # Offset of 1 from the diagonal
+        # Block Diagonal indices among Upper Triangular indices
         ind = np.where(index[indtri1] == index[indtri2])
         indr = indtri1[ind]
         indc = indtri2[ind]
         r = t[indr] - t[indc]
-        r2 = r*r
-        r2_lq2 = r2/(-lq2[index[indr]])
-        #Calculation of  covariance function
+        r2 = r * r
+        r2_lq2 = r2 / (-lq2[index[indr]])
+        # Calculation of  covariance function
         er2_lq2 = np.exp(r2_lq2)
-        #Gradient wrt t
-        c = 2.*r/lq2[index[indr]]
-        gt[indr, indc] = er2_lq2*c
-        #Complete the lower triangular
+        # Gradient wrt t
+        c = 2.0 * r / lq2[index[indr]]
+        gt[indr, indc] = er2_lq2 * c
+        # Complete the lower triangular
         gt[indc, indr] = -gt[indr, indc]
         return gt
 
-    #Gradients for Diagonal Kff
+    # Gradients for Diagonal Kff
     def _gkdiag(self, X, index):
-        index = index.reshape(index.size,)
-        #terms that move along t
+        index = index.reshape(
+            index.size,
+        )
+        # terms that move along t
         d = np.unique(index)
         B = self.B[d].values
         C = self.C[d].values
         S = self.W[d, :].values
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        #Check where wd becomes complex
-        wbool = C*C >= 4.*B
-        #Output related variables must be column-wise
+        # Check where wd becomes complex
+        wbool = C * C >= 4.0 * B
+        # Output related variables must be column-wise
         t = X[:, 0].reshape(X.shape[0], 1)
         B = B.reshape(B.size, 1)
         C = C.reshape(C.size, 1)
-        alpha = .5*C
-        C2 = C*C
-        S2 = S*S
+        alpha = 0.5 * C
+        C2 = C * C
+        S2 = S * S
 
         wbool2 = wbool[index]
         ind2t = np.where(wbool2)
         ind3t = np.where(np.logical_not(wbool2))
 
-        #Input related variables must be row-wise
+        # Input related variables must be row-wise
         lq = self.lengthscale.values.reshape(1, self.rank)
-        lq2 = lq*lq
+        lq2 = lq * lq
 
         gB = np.empty((t.size, lq.size))
         gC = np.empty((t.size, lq.size))
@@ -677,694 +752,851 @@ class EQ_ODE2(Kern):
         gS = np.empty((t.size, lq.size))
 
         indD = np.arange(B.size)
-        #(1) When wd is real
+        # (1) When wd is real
         if np.any(np.logical_not(wbool)):
-            #Indexes of index and t related to (1)
+            # Indexes of index and t related to (1)
             t1 = t[ind3t]
             ind = index[ind3t]
-            #Index transformation
+            # Index transformation
             d = np.asarray(np.where(np.logical_not(wbool))[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            S2lq = S2[d]*(.5*lq)
-            c0 = S2lq*np.sqrt(np.pi)
+            # Dx1 terms
+            S2lq = S2[d] * (0.5 * lq)
+            c0 = S2lq * np.sqrt(np.pi)
 
-            w = .5*np.sqrt(4.*B[d] - C2[d])
+            w = 0.5 * np.sqrt(4.0 * B[d] - C2[d])
             alphad = alpha[d]
-            alpha2 = alphad*alphad
-            w2 = w*w
-            gam = alphad + 1j*w
-            gam2 = gam*gam
-            gamc = alphad - 1j*w
-            c1 = 0.5/alphad
-            c2 = 0.5/gam
+            alpha2 = alphad * alphad
+            w2 = w * w
+            gam = alphad + 1j * w
+            gam2 = gam * gam
+            gamc = alphad - 1j * w
+            c1 = 0.5 / alphad
+            c2 = 0.5 / gam
             c = c1 - c2
 
-            #DxQ terms
-            c0 = c0/w2
-            nu = (.5*lq)*gam
-            #Nx1 terms
-            gamt = -gam[ind]*t1
-            gamct = -gamc[ind]*t1
+            # DxQ terms
+            c0 = c0 / w2
+            nu = (0.5 * lq) * gam
+            # Nx1 terms
+            gamt = -gam[ind] * t1
+            gamct = -gamc[ind] * t1
             egamt = np.exp(gamt)
             egamct = np.exp(gamct)
-            ec = egamt*c2[ind] - egamct*c1[ind]
+            ec = egamt * c2[ind] - egamct * c1[ind]
 
-            #NxQ terms
-            t_lq = t1/lq
-            t2_lq2 = -t_lq*t_lq
-            t_lq2 = t_lq/lq
+            # NxQ terms
+            t_lq = t1 / lq
+            t2_lq2 = -t_lq * t_lq
+            t_lq2 = t_lq / lq
 
             et2_lq2 = np.exp(t2_lq2)
             etlq2gamt = np.exp(t2_lq2 + gamt)
 
             ##Upsilon calculations
-            #Using wofz
-            wnu = wofz(1j*nu)
+            # Using wofz
+            wnu = wofz(1j * nu)
             lwnu = np.log(wnu)
-            t2_lq2 = -t_lq*t_lq
-            upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j*(t_lq + nu[ind]))))
-            upm[t1[:, 0] == 0, :] = 0.
+            t2_lq2 = -t_lq * t_lq
+            upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j * (t_lq + nu[ind]))))
+            upm[t1[:, 0] == 0, :] = 0.0
 
-            nu2 = nu*nu
+            nu2 = nu * nu
             z1 = nu[ind] - t_lq
-            indv1 = np.where(z1.real >= 0.)
-            indv2 = np.where(z1.real < 0.)
+            indv1 = np.where(z1.real >= 0.0)
+            indv2 = np.where(z1.real < 0.0)
             upv = -np.exp(lwnu[ind] + gamt)
             if indv1[0].shape > 0:
-                upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1])))
+                upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1])))
             if indv2[0].shape > 0:
-                upv[indv2] += np.exp(nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.))\
-                             - np.exp(t2_lq2[indv2] + np.log(wofz(-1j*z1[indv2])))
-            upv[t1[:, 0] == 0, :] = 0.
+                upv[indv2] += np.exp(
+                    nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.0)
+                ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2])))
+            upv[t1[:, 0] == 0, :] = 0.0
 
-            #Gradient wrt S
-            Slq = S[d]*lq #For grad wrt S
-            c0_S = Slq*np.sqrt(np.pi)/w2
-            K01 = c0_S*c
+            # Gradient wrt S
+            Slq = S[d] * lq  # For grad wrt S
+            c0_S = Slq * np.sqrt(np.pi) / w2
+            K01 = c0_S * c
 
-            gS[ind3t] = np.real(K01[ind]*upm) + np.real((c0_S[ind]*ec)*upv)
+            gS[ind3t] = np.real(K01[ind] * upm) + np.real((c0_S[ind] * ec) * upv)
 
-            #For B and C
-            upmd = etlq2gamt - 1.
+            # For B and C
+            upmd = etlq2gamt - 1.0
             upvd = egamt - et2_lq2
 
             # gradient wrt B
-            dw_dB = 0.5/w
-            dgam_dB = 1j*dw_dB
+            dw_dB = 0.5 / w
+            dgam_dB = 1j * dw_dB
 
-            Ba1 = c0*(0.5*dgam_dB/gam2 + (0.5*lq2*gam*dgam_dB - 2.*dw_dB/w)*c)
-            Ba2_1 = c0*(dgam_dB*(0.5/gam2 - 0.25*lq2) + dw_dB/(w*gam))
-            Ba2_2 = c0*dgam_dB/gam
-            Ba3 = c0*(-0.25*lq2*gam*dgam_dB/alphad + dw_dB/(w*alphad))
-            Ba4_1 = (S2lq*lq)*dgam_dB/w2
-            Ba4 = Ba4_1*c
+            Ba1 = c0 * (
+                0.5 * dgam_dB / gam2 + (0.5 * lq2 * gam * dgam_dB - 2.0 * dw_dB / w) * c
+            )
+            Ba2_1 = c0 * (dgam_dB * (0.5 / gam2 - 0.25 * lq2) + dw_dB / (w * gam))
+            Ba2_2 = c0 * dgam_dB / gam
+            Ba3 = c0 * (-0.25 * lq2 * gam * dgam_dB / alphad + dw_dB / (w * alphad))
+            Ba4_1 = (S2lq * lq) * dgam_dB / w2
+            Ba4 = Ba4_1 * c
 
-            gB[ind3t] = np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\
-                + np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd)
+            gB[ind3t] = (
+                np.real(Ba1[ind] * upm)
+                - np.real(
+                    ((Ba2_1[ind] + Ba2_2[ind] * t1) * egamt - Ba3[ind] * egamct) * upv
+                )
+                + np.real(Ba4[ind] * upmd)
+                + np.real((Ba4_1[ind] * ec) * upvd)
+            )
 
             # gradient wrt C
-            dw_dC = - alphad*dw_dB
-            dgam_dC = 0.5 + 1j*dw_dC
+            dw_dC = -alphad * dw_dB
+            dgam_dC = 0.5 + 1j * dw_dC
 
-            Ca1 = c0*(-0.25/alpha2 + 0.5*dgam_dC/gam2 + (0.5*lq2*gam*dgam_dC - 2.*dw_dC/w)*c)
-            Ca2_1 = c0*(dgam_dC*(0.5/gam2 - 0.25*lq2) + dw_dC/(w*gam))
-            Ca2_2 = c0*dgam_dC/gam
-            Ca3_1 = c0*(0.25/alpha2 - 0.25*lq2*gam*dgam_dC/alphad + dw_dC/(w*alphad))
-            Ca3_2 = 0.5*c0/alphad
-            Ca4_1 = (S2lq*lq)*dgam_dC/w2
-            Ca4 = Ca4_1*c
+            Ca1 = c0 * (
+                -0.25 / alpha2
+                + 0.5 * dgam_dC / gam2
+                + (0.5 * lq2 * gam * dgam_dC - 2.0 * dw_dC / w) * c
+            )
+            Ca2_1 = c0 * (dgam_dC * (0.5 / gam2 - 0.25 * lq2) + dw_dC / (w * gam))
+            Ca2_2 = c0 * dgam_dC / gam
+            Ca3_1 = c0 * (
+                0.25 / alpha2
+                - 0.25 * lq2 * gam * dgam_dC / alphad
+                + dw_dC / (w * alphad)
+            )
+            Ca3_2 = 0.5 * c0 / alphad
+            Ca4_1 = (S2lq * lq) * dgam_dC / w2
+            Ca4 = Ca4_1 * c
 
-            gC[ind3t] = np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\
-                + np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd)
+            gC[ind3t] = (
+                np.real(Ca1[ind] * upm)
+                - np.real(
+                    (
+                        (Ca2_1[ind] + Ca2_2[ind] * t1) * egamt
+                        - (Ca3_1[ind] + Ca3_2[ind] * t1) * egamct
+                    )
+                    * upv
+                )
+                + np.real(Ca4[ind] * upmd)
+                + np.real((Ca4_1[ind] * ec) * upvd)
+            )
 
-            #Gradient wrt lengthscale
-            #DxQ terms
-            la = (1./lq + nu*gam)*c0
-            la1 = la*c
+            # Gradient wrt lengthscale
+            # DxQ terms
+            la = (1.0 / lq + nu * gam) * c0
+            la1 = la * c
 
-            c0l = (S2[d]/w2)*lq
-            la3 = c0l*c
-            gam_2 = .5*gam
-            glq[ind3t] = (la1[ind]*upm).real + ((la[ind]*ec)*upv).real\
-                + (la3[ind]*(-gam_2[ind] + etlq2gamt*(-t_lq2 + gam_2[ind]))).real\
-                + ((c0l[ind]*ec)*(-et2_lq2*(t_lq2 + gam_2[ind]) + egamt*gam_2[ind])).real
+            c0l = (S2[d] / w2) * lq
+            la3 = c0l * c
+            gam_2 = 0.5 * gam
+            glq[ind3t] = (
+                (la1[ind] * upm).real
+                + ((la[ind] * ec) * upv).real
+                + (la3[ind] * (-gam_2[ind] + etlq2gamt * (-t_lq2 + gam_2[ind]))).real
+                + (
+                    (c0l[ind] * ec)
+                    * (-et2_lq2 * (t_lq2 + gam_2[ind]) + egamt * gam_2[ind])
+                ).real
+            )
 
-        #(2) When w_d is complex
+        # (2) When w_d is complex
         if np.any(wbool):
             t1 = t[ind2t]
             ind = index[ind2t]
-            #Index transformation
+            # Index transformation
             d = np.asarray(np.where(wbool)[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            S2lq = S2[d]*(.25*lq)
-            c0 = S2lq*np.sqrt(np.pi)
-            w = .5*np.sqrt(C2[d]-4.*B[d])
-            w2 = -w*w
+            # Dx1 terms
+            S2lq = S2[d] * (0.25 * lq)
+            c0 = S2lq * np.sqrt(np.pi)
+            w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d])
+            w2 = -w * w
             alphad = alpha[d]
-            alpha2 = alphad*alphad
+            alpha2 = alphad * alphad
             gam = alphad - w
             gamc = alphad + w
-            gam2 = gam*gam
-            gamc2 = gamc*gamc
-            c1 = .5/alphad
-            c21 = .5/gam
-            c22 = .5/gamc
+            gam2 = gam * gam
+            gamc2 = gamc * gamc
+            c1 = 0.5 / alphad
+            c21 = 0.5 / gam
+            c22 = 0.5 / gamc
             c = c1 - c21
             c2 = c1 - c22
-            #DxQ terms
-            c0 = c0/w2
-            nu = .5*lq*gam
-            nuc = .5*lq*gamc
+            # DxQ terms
+            c0 = c0 / w2
+            nu = 0.5 * lq * gam
+            nuc = 0.5 * lq * gamc
 
-            #Nx1 terms
-            gamt = -gam[ind]*t1
-            gamct = -gamc[ind]*t1
+            # Nx1 terms
+            gamt = -gam[ind] * t1
+            gamct = -gamc[ind] * t1
             egamt = np.exp(gamt)
             egamct = np.exp(gamct)
-            ec = egamt*c21[ind] - egamct*c1[ind]
-            ec2 = egamct*c22[ind] - egamt*c1[ind]
-            #NxQ terms
-            t_lq = t1/lq
-            t2_lq2 = -t_lq*t_lq
+            ec = egamt * c21[ind] - egamct * c1[ind]
+            ec2 = egamct * c22[ind] - egamt * c1[ind]
+            # NxQ terms
+            t_lq = t1 / lq
+            t2_lq2 = -t_lq * t_lq
 
             et2_lq2 = np.exp(t2_lq2)
             etlq2gamct = np.exp(t2_lq2 + gamct)
             etlq2gamt = np.exp(t2_lq2 + gamt)
 
-            #Upsilon Calculations using wofz
-            t2_lq2 = -t_lq*t_lq #Required when using wofz
-            wnu = np.real(wofz(1j*nu))
+            # Upsilon Calculations using wofz
+            t2_lq2 = -t_lq * t_lq  # Required when using wofz
+            wnu = np.real(wofz(1j * nu))
             lwnu = np.log(wnu)
 
-            upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j*(t_lq + nu[ind])).real))
-            upm[t1[:, 0] == 0., :] = 0.
+            upm = wnu[ind] - np.exp(
+                t2_lq2 + gamt + np.log(wofz(1j * (t_lq + nu[ind])).real)
+            )
+            upm[t1[:, 0] == 0.0, :] = 0.0
 
-            nu2 = nu*nu
+            nu2 = nu * nu
             z1 = nu[ind] - t_lq
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             upv = -np.exp(lwnu[ind] + gamt)
             if indv1[0].shape > 0:
-                upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real))
             if indv2[0].shape > 0:
-                upv[indv2] += np.exp(nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.)) - np.exp(t2_lq2[indv2]\
-                    + np.log(wofz(-1j*z1[indv2]).real))
-            upv[t1[:, 0] == 0, :] = 0.
+                upv[indv2] += np.exp(
+                    nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.0)
+                ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upv[t1[:, 0] == 0, :] = 0.0
 
-            wnuc = wofz(1j*nuc).real
-            upmc = wnuc[ind] - np.exp(t2_lq2 + gamct + np.log(wofz(1j*(t_lq + nuc[ind])).real))
-            upmc[t1[:, 0] == 0., :] = 0.
+            wnuc = wofz(1j * nuc).real
+            upmc = wnuc[ind] - np.exp(
+                t2_lq2 + gamct + np.log(wofz(1j * (t_lq + nuc[ind])).real)
+            )
+            upmc[t1[:, 0] == 0.0, :] = 0.0
 
             lwnuc = np.log(wnuc)
-            nuc2 = nuc*nuc
+            nuc2 = nuc * nuc
             z1 = nuc[ind] - t_lq
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             upvc = -np.exp(lwnuc[ind] + gamct)
             if indv1[0].shape > 0:
-                upvc[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upvc[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real))
             if indv2[0].shape > 0:
-                upvc[indv2] += np.exp(nuc2[ind[indv2[0]], indv2[1]] + gamct[indv2[0], 0] + np.log(2.)) - np.exp(t2_lq2[indv2]\
-                    + np.log(wofz(-1j*z1[indv2]).real))
-            upvc[t1[:, 0] == 0, :] = 0.
+                upvc[indv2] += np.exp(
+                    nuc2[ind[indv2[0]], indv2[1]] + gamct[indv2[0], 0] + np.log(2.0)
+                ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upvc[t1[:, 0] == 0, :] = 0.0
 
-            #Gradient wrt S
-            #NxQ terms
-            c0_S = (S[d]/w2)*(lq*(np.sqrt(np.pi)*.5))
+            # Gradient wrt S
+            # NxQ terms
+            c0_S = (S[d] / w2) * (lq * (np.sqrt(np.pi) * 0.5))
 
-            K011 = c0_S*c
-            K012 = c0_S*c2
+            K011 = c0_S * c
+            K012 = c0_S * c2
 
-            gS[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0_S[ind]*ec)*upv + (c0_S[ind]*ec2)*upvc
+            gS[ind2t] = (
+                K011[ind] * upm
+                + K012[ind] * upmc
+                + (c0_S[ind] * ec) * upv
+                + (c0_S[ind] * ec2) * upvc
+            )
 
-            #Is required to cache this, C gradient also required them
-            upmd = -1. + etlq2gamt
+            # Is required to cache this, C gradient also required them
+            upmd = -1.0 + etlq2gamt
             upvd = -et2_lq2 + egamt
-            upmdc = -1. + etlq2gamct
+            upmdc = -1.0 + etlq2gamct
             upvdc = -et2_lq2 + egamct
 
             # Gradient wrt B
-            dgam_dB = 0.5/w
+            dgam_dB = 0.5 / w
             dgamc_dB = -dgam_dB
 
-            Ba1 = c0*(0.5*dgam_dB/gam2 + (0.5*lq2*gam*dgam_dB - 1./w2)*c)
-            Ba3 = c0*(-0.25*lq2*gam*dgam_dB/alphad + 0.5/(w2*alphad))
-            Ba4_1 = (S2lq*lq)*dgam_dB/w2
-            Ba4 = Ba4_1*c
-            Ba2_1 = c0*(dgam_dB*(0.5/gam2 - 0.25*lq2) + 0.5/(w2*gam))
-            Ba2_2 = c0*dgam_dB/gam
+            Ba1 = c0 * (
+                0.5 * dgam_dB / gam2 + (0.5 * lq2 * gam * dgam_dB - 1.0 / w2) * c
+            )
+            Ba3 = c0 * (-0.25 * lq2 * gam * dgam_dB / alphad + 0.5 / (w2 * alphad))
+            Ba4_1 = (S2lq * lq) * dgam_dB / w2
+            Ba4 = Ba4_1 * c
+            Ba2_1 = c0 * (dgam_dB * (0.5 / gam2 - 0.25 * lq2) + 0.5 / (w2 * gam))
+            Ba2_2 = c0 * dgam_dB / gam
 
-            Ba1c = c0*(0.5*dgamc_dB/gamc2 + (0.5*lq2*gamc*dgamc_dB - 1./w2)*c2)
-            Ba3c = c0*(-0.25*lq2*gamc*dgamc_dB/alphad + 0.5/(w2*alphad))
-            Ba4_1c = (S2lq*lq)*dgamc_dB/w2
-            Ba4c = Ba4_1c*c2
-            Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc))
-            Ba2_2c = c0*dgamc_dB/gamc
+            Ba1c = c0 * (
+                0.5 * dgamc_dB / gamc2 + (0.5 * lq2 * gamc * dgamc_dB - 1.0 / w2) * c2
+            )
+            Ba3c = c0 * (-0.25 * lq2 * gamc * dgamc_dB / alphad + 0.5 / (w2 * alphad))
+            Ba4_1c = (S2lq * lq) * dgamc_dB / w2
+            Ba4c = Ba4_1c * c2
+            Ba2_1c = c0 * (dgamc_dB * (0.5 / gamc2 - 0.25 * lq2) + 0.5 / (w2 * gamc))
+            Ba2_2c = c0 * dgamc_dB / gamc
 
-            gB[ind2t] = Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\
-                + Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\
-                + Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\
-                + Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc
+            gB[ind2t] = (
+                Ba1[ind] * upm
+                - ((Ba2_1[ind] + Ba2_2[ind] * t1) * egamt - Ba3[ind] * egamct) * upv
+                + Ba4[ind] * upmd
+                + (Ba4_1[ind] * ec) * upvd
+                + Ba1c[ind] * upmc
+                - ((Ba2_1c[ind] + Ba2_2c[ind] * t1) * egamct - Ba3c[ind] * egamt) * upvc
+                + Ba4c[ind] * upmdc
+                + (Ba4_1c[ind] * ec2) * upvdc
+            )
 
             ##Gradient wrt C
-            dw_dC = 0.5*alphad/w
+            dw_dC = 0.5 * alphad / w
             dgam_dC = 0.5 - dw_dC
             dgamc_dC = 0.5 + dw_dC
-            S2lq2 = S2lq*lq
+            S2lq2 = S2lq * lq
 
-            Ca1 = c0*(-0.25/alpha2 + 0.5*dgam_dC/gam2 + (0.5*lq2*gam*dgam_dC + alphad/w2)*c)
-            Ca2_1 = c0*(dgam_dC*(0.5/gam2 - 0.25*lq2) - 0.5*alphad/(w2*gam))
-            Ca2_2 = c0*dgam_dC/gam
-            Ca3_1 = c0*(0.25/alpha2 - 0.25*lq2*gam*dgam_dC/alphad - 0.5/w2)
-            Ca3_2 = 0.5*c0/alphad
-            Ca4_1 = S2lq2*(dgam_dC/w2)
-            Ca4 = Ca4_1*c
+            Ca1 = c0 * (
+                -0.25 / alpha2
+                + 0.5 * dgam_dC / gam2
+                + (0.5 * lq2 * gam * dgam_dC + alphad / w2) * c
+            )
+            Ca2_1 = c0 * (
+                dgam_dC * (0.5 / gam2 - 0.25 * lq2) - 0.5 * alphad / (w2 * gam)
+            )
+            Ca2_2 = c0 * dgam_dC / gam
+            Ca3_1 = c0 * (
+                0.25 / alpha2 - 0.25 * lq2 * gam * dgam_dC / alphad - 0.5 / w2
+            )
+            Ca3_2 = 0.5 * c0 / alphad
+            Ca4_1 = S2lq2 * (dgam_dC / w2)
+            Ca4 = Ca4_1 * c
 
-            Ca1c = c0*(-0.25/alpha2 + 0.5*dgamc_dC/gamc2 + (0.5*lq2*gamc*dgamc_dC + alphad/w2)*c2)
-            Ca2_1c = c0*(dgamc_dC*(0.5/gamc2 - 0.25*lq2) - 0.5*alphad/(w2*gamc))
-            Ca2_2c = c0*dgamc_dC/gamc
-            Ca3_1c = c0*(0.25/alpha2 - 0.25*lq2*gamc*dgamc_dC/alphad - 0.5/w2)
-            Ca3_2c = 0.5*c0/alphad
-            Ca4_1c = S2lq2*(dgamc_dC/w2)
-            Ca4c = Ca4_1c*c2
+            Ca1c = c0 * (
+                -0.25 / alpha2
+                + 0.5 * dgamc_dC / gamc2
+                + (0.5 * lq2 * gamc * dgamc_dC + alphad / w2) * c2
+            )
+            Ca2_1c = c0 * (
+                dgamc_dC * (0.5 / gamc2 - 0.25 * lq2) - 0.5 * alphad / (w2 * gamc)
+            )
+            Ca2_2c = c0 * dgamc_dC / gamc
+            Ca3_1c = c0 * (
+                0.25 / alpha2 - 0.25 * lq2 * gamc * dgamc_dC / alphad - 0.5 / w2
+            )
+            Ca3_2c = 0.5 * c0 / alphad
+            Ca4_1c = S2lq2 * (dgamc_dC / w2)
+            Ca4c = Ca4_1c * c2
 
-            gC[ind2t] = Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\
-                + Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\
-                + Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\
-                + Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc
+            gC[ind2t] = (
+                Ca1[ind] * upm
+                - (
+                    (Ca2_1[ind] + Ca2_2[ind] * t1) * egamt
+                    - (Ca3_1[ind] + Ca3_2[ind] * t1) * egamct
+                )
+                * upv
+                + Ca4[ind] * upmd
+                + (Ca4_1[ind] * ec) * upvd
+                + Ca1c[ind] * upmc
+                - (
+                    (Ca2_1c[ind] + Ca2_2c[ind] * t1) * egamct
+                    - (Ca3_1c[ind] + Ca3_2c[ind] * t1) * egamt
+                )
+                * upvc
+                + Ca4c[ind] * upmdc
+                + (Ca4_1c[ind] * ec2) * upvdc
+            )
 
-            #Gradient wrt lengthscale
-            #DxQ terms
-            la = (1./lq + nu*gam)*c0
-            lac = (1./lq + nuc*gamc)*c0
-            la1 = la*c
-            la1c = lac*c2
-            t_lq2 = t_lq/lq
-            c0l = (S2[d]/w2)*(.5*lq)
-            la3 = c0l*c
-            la3c = c0l*c2
-            gam_2 = .5*gam
-            gamc_2 = .5*gamc
-            glq[ind2t] = la1c[ind]*upmc + (lac[ind]*ec2)*upvc\
-                + la3c[ind]*(-gamc_2[ind] + etlq2gamct*(-t_lq2 + gamc_2[ind]))\
-                + (c0l[ind]*ec2)*(-et2_lq2*(t_lq2 + gamc_2[ind]) + egamct*gamc_2[ind])\
-                + la1[ind]*upm + (la[ind]*ec)*upv\
-                + la3[ind]*(-gam_2[ind] + etlq2gamt*(-t_lq2 + gam_2[ind]))\
-                + (c0l[ind]*ec)*(-et2_lq2*(t_lq2 + gam_2[ind]) + egamt*gam_2[ind])
+            # Gradient wrt lengthscale
+            # DxQ terms
+            la = (1.0 / lq + nu * gam) * c0
+            lac = (1.0 / lq + nuc * gamc) * c0
+            la1 = la * c
+            la1c = lac * c2
+            t_lq2 = t_lq / lq
+            c0l = (S2[d] / w2) * (0.5 * lq)
+            la3 = c0l * c
+            la3c = c0l * c2
+            gam_2 = 0.5 * gam
+            gamc_2 = 0.5 * gamc
+            glq[ind2t] = (
+                la1c[ind] * upmc
+                + (lac[ind] * ec2) * upvc
+                + la3c[ind] * (-gamc_2[ind] + etlq2gamct * (-t_lq2 + gamc_2[ind]))
+                + (c0l[ind] * ec2)
+                * (-et2_lq2 * (t_lq2 + gamc_2[ind]) + egamct * gamc_2[ind])
+                + la1[ind] * upm
+                + (la[ind] * ec) * upv
+                + la3[ind] * (-gam_2[ind] + etlq2gamt * (-t_lq2 + gam_2[ind]))
+                + (c0l[ind] * ec)
+                * (-et2_lq2 * (t_lq2 + gam_2[ind]) + egamt * gam_2[ind])
+            )
 
         return glq, gS, gB, gC
 
     def _gkfu(self, X, index, Z, index2):
-        index = index.reshape(index.size,)
-        #TODO: reduce memory usage
-        #terms that move along t
+        index = index.reshape(
+            index.size,
+        )
+        # TODO: reduce memory usage
+        # terms that move along t
         d = np.unique(index)
         B = self.B[d].values
         C = self.C[d].values
         S = self.W[d, :].values
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        #Check where wd becomes complex
-        wbool = C*C >= 4.*B
-        #t column
+        # Check where wd becomes complex
+        wbool = C * C >= 4.0 * B
+        # t column
         t = X[:, 0].reshape(X.shape[0], 1)
         C = C.reshape(C.size, 1)
         B = B.reshape(B.size, 1)
-        C2 = C*C
-        #z row
+        C2 = C * C
+        # z row
         z = Z[:, 0].reshape(1, Z.shape[0])
-        index2 = index2.reshape(index2.size,)
+        index2 = index2.reshape(
+            index2.size,
+        )
         lq = self.lengthscale.values.reshape((1, self.rank))
-        lq2 = lq*lq
+        lq2 = lq * lq
 
-        alpha = .5*C
+        alpha = 0.5 * C
 
         wbool2 = wbool[index]
         ind2t = np.where(wbool2)
         ind3t = np.where(np.logical_not(wbool2))
-        #kfu = np.empty((t.size, z.size))
+        # kfu = np.empty((t.size, z.size))
         glq = np.empty((t.size, z.size))
         gSdq = np.empty((t.size, z.size))
         gB = np.empty((t.size, z.size))
         gC = np.empty((t.size, z.size))
 
         indD = np.arange(B.size)
-        #(1) when wd is real
+        # (1) when wd is real
         if np.any(np.logical_not(wbool)):
-            #Indexes of index and t related to (2)
+            # Indexes of index and t related to (2)
             t1 = t[ind3t]
             ind = index[ind3t]
-            #Index transformation
+            # Index transformation
             d = np.asarray(np.where(np.logical_not(wbool))[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            w = .5*np.sqrt(4.*B[d] - C2[d])
+            # Dx1 terms
+            w = 0.5 * np.sqrt(4.0 * B[d] - C2[d])
             alphad = alpha[d]
-            gam = alphad - 1j*w
-            gam_2 = .5*gam
-            S_w = S[d]/w
-            S_wpi = S_w*(.5*np.sqrt(np.pi))
-            #DxQ terms
-            c0 = S_wpi*lq #lq*Sdq*sqrt(pi)/(2w)
-            nu = gam*lq
-            nu2 = 1.+.5*(nu*nu)
-            nu *= .5
+            gam = alphad - 1j * w
+            gam_2 = 0.5 * gam
+            S_w = S[d] / w
+            S_wpi = S_w * (0.5 * np.sqrt(np.pi))
+            # DxQ terms
+            c0 = S_wpi * lq  # lq*Sdq*sqrt(pi)/(2w)
+            nu = gam * lq
+            nu2 = 1.0 + 0.5 * (nu * nu)
+            nu *= 0.5
 
-            #1xM terms
-            z_lq = z/lq[0, index2]
-            z_lq2 = -z_lq*z_lq
-            #NxQ terms
-            t_lq = t1/lq
-            #DxM terms
-            gamt = -gam[ind]*t1
-            #NxM terms
+            # 1xM terms
+            z_lq = z / lq[0, index2]
+            z_lq2 = -z_lq * z_lq
+            # NxQ terms
+            t_lq = t1 / lq
+            # DxM terms
+            gamt = -gam[ind] * t1
+            # NxM terms
             zt_lq = z_lq - t_lq[:, index2]
-            zt_lq2 = -zt_lq*zt_lq
+            zt_lq2 = -zt_lq * zt_lq
             ezt_lq2 = -np.exp(zt_lq2)
             ezgamt = np.exp(z_lq2 + gamt)
 
             # Upsilon calculations
             fullind = np.ix_(ind, index2)
-            upsi = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind]))))
-            tz = t1-z
+            upsi = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind]))))
+            tz = t1 - z
             z1 = zt_lq + nu[fullind]
-            indv1 = np.where(z1.real >= 0.)
-            indv2 = np.where(z1.real < 0.)
+            indv1 = np.where(z1.real >= 0.0)
+            indv2 = np.where(z1.real < 0.0)
             if indv1[0].shape > 0:
-                upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1])))
+                upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1])))
             if indv2[0].shape > 0:
-                nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2])))
-            upsi[t1[:, 0] == 0., :] = 0.
+                nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi[indv2] += np.exp(
+                    nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2])))
+            upsi[t1[:, 0] == 0.0, :] = 0.0
 
-            #Gradient wrt S
-            #DxQ term
-            Sa1 = lq*(.5*np.sqrt(np.pi))/w
+            # Gradient wrt S
+            # DxQ term
+            Sa1 = lq * (0.5 * np.sqrt(np.pi)) / w
 
-            gSdq[ind3t] = Sa1[np.ix_(ind, index2)]*upsi.imag
+            gSdq[ind3t] = Sa1[np.ix_(ind, index2)] * upsi.imag
 
-            #Gradient wrt lq
-            la1 = S_wpi*nu2
-            la2 = S_w*lq
-            uplq = ezt_lq2*(gam_2[ind])
-            uplq += ezgamt*(-z_lq/lq[0, index2] + gam_2[ind])
+            # Gradient wrt lq
+            la1 = S_wpi * nu2
+            la2 = S_w * lq
+            uplq = ezt_lq2 * (gam_2[ind])
+            uplq += ezgamt * (-z_lq / lq[0, index2] + gam_2[ind])
 
-            glq[ind3t] = (la1[np.ix_(ind, index2)]*upsi).imag
-            glq[ind3t] += la2[np.ix_(ind, index2)]*uplq.imag
+            glq[ind3t] = (la1[np.ix_(ind, index2)] * upsi).imag
+            glq[ind3t] += la2[np.ix_(ind, index2)] * uplq.imag
 
-            #Gradient wrt B
-            #Dx1 terms
-            dw_dB = .5/w
-            dgam_dB = -1j*dw_dB
-            #DxQ terms
-            Ba1 = -c0*dw_dB/w #DXQ
-            Ba2 = c0*dgam_dB #DxQ
-            Ba3 = lq2*gam_2 #DxQ
-            Ba4 = (dgam_dB*S_w)*(.5*lq2) #DxQ
+            # Gradient wrt B
+            # Dx1 terms
+            dw_dB = 0.5 / w
+            dgam_dB = -1j * dw_dB
+            # DxQ terms
+            Ba1 = -c0 * dw_dB / w  # DXQ
+            Ba2 = c0 * dgam_dB  # DxQ
+            Ba3 = lq2 * gam_2  # DxQ
+            Ba4 = (dgam_dB * S_w) * (0.5 * lq2)  # DxQ
 
-            gB[ind3t] = ((Ba1[np.ix_(ind, index2)] + Ba2[np.ix_(ind, index2)]*(Ba3[np.ix_(ind, index2)] - (t1-z)))*upsi).imag\
-                + (Ba4[np.ix_(ind, index2)]*(ezt_lq2 + ezgamt)).imag
+            gB[ind3t] = (
+                (
+                    Ba1[np.ix_(ind, index2)]
+                    + Ba2[np.ix_(ind, index2)] * (Ba3[np.ix_(ind, index2)] - (t1 - z))
+                )
+                * upsi
+            ).imag + (Ba4[np.ix_(ind, index2)] * (ezt_lq2 + ezgamt)).imag
 
-            #Gradient wrt C (it uses some calculations performed in B)
-            #Dx1 terms
-            dw_dC = -.5*alphad/w
-            dgam_dC = 0.5 - 1j*dw_dC
-            #DxQ terms
-            Ca1 = -c0*dw_dC/w #DXQ
-            Ca2 = c0*dgam_dC #DxQ
-            Ca4 = (dgam_dC*S_w)*(.5*lq2) #DxQ
+            # Gradient wrt C (it uses some calculations performed in B)
+            # Dx1 terms
+            dw_dC = -0.5 * alphad / w
+            dgam_dC = 0.5 - 1j * dw_dC
+            # DxQ terms
+            Ca1 = -c0 * dw_dC / w  # DXQ
+            Ca2 = c0 * dgam_dC  # DxQ
+            Ca4 = (dgam_dC * S_w) * (0.5 * lq2)  # DxQ
 
-            gC[ind3t] = ((Ca1[np.ix_(ind, index2)] + Ca2[np.ix_(ind, index2)]*(Ba3[np.ix_(ind, index2)] - (t1-z)))*upsi).imag\
-                + (Ca4[np.ix_(ind, index2)]*(ezt_lq2 + ezgamt)).imag
+            gC[ind3t] = (
+                (
+                    Ca1[np.ix_(ind, index2)]
+                    + Ca2[np.ix_(ind, index2)] * (Ba3[np.ix_(ind, index2)] - (t1 - z))
+                )
+                * upsi
+            ).imag + (Ca4[np.ix_(ind, index2)] * (ezt_lq2 + ezgamt)).imag
 
-        #(2) when wd is complex
+        # (2) when wd is complex
         if np.any(wbool):
-            #Indexes of index and t related to (2)
+            # Indexes of index and t related to (2)
             t1 = t[ind2t]
             ind = index[ind2t]
-            #Index transformation
+            # Index transformation
             d = np.asarray(np.where(wbool)[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            w = .5*np.sqrt(C2[d] - 4.*B[d])
-            w2 = w*w
+            # Dx1 terms
+            w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d])
+            w2 = w * w
             alphad = alpha[d]
             gam = alphad - w
             gamc = alphad + w
-            #DxQ terms
-            S_w= -S[d]/w #minus is given by j*j
-            S_wpi = S_w*(.25*np.sqrt(np.pi))
+            # DxQ terms
+            S_w = -S[d] / w  # minus is given by j*j
+            S_wpi = S_w * (0.25 * np.sqrt(np.pi))
 
-            c0 = S_wpi*lq
-            gam_2 = .5*gam
-            gamc_2 = .5*gamc
-            nu = gam*lq
-            nuc = gamc*lq
-            nu2 = 1.+.5*(nu*nu)
-            nuc2 = 1.+.5*(nuc*nuc)
-            nu *= .5
-            nuc *= .5
-            #1xM terms
-            z_lq = z/lq[0, index2]
-            z_lq2 = -z_lq*z_lq
-            #Nx1
-            gamt = -gam[ind]*t1
-            gamct = -gamc[ind]*t1
-            #NxQ terms
-            t_lq = t1/lq[0, index2]
-            #NxM terms
+            c0 = S_wpi * lq
+            gam_2 = 0.5 * gam
+            gamc_2 = 0.5 * gamc
+            nu = gam * lq
+            nuc = gamc * lq
+            nu2 = 1.0 + 0.5 * (nu * nu)
+            nuc2 = 1.0 + 0.5 * (nuc * nuc)
+            nu *= 0.5
+            nuc *= 0.5
+            # 1xM terms
+            z_lq = z / lq[0, index2]
+            z_lq2 = -z_lq * z_lq
+            # Nx1
+            gamt = -gam[ind] * t1
+            gamct = -gamc[ind] * t1
+            # NxQ terms
+            t_lq = t1 / lq[0, index2]
+            # NxM terms
             zt_lq = z_lq - t_lq
-            zt_lq2 = -zt_lq*zt_lq
+            zt_lq2 = -zt_lq * zt_lq
             ezt_lq2 = -np.exp(zt_lq2)
             ezgamt = np.exp(z_lq2 + gamt)
             ezgamct = np.exp(z_lq2 + gamct)
 
             # Upsilon calculations
             fullind = np.ix_(ind, index2)
-            upsi1 = - np.exp(z_lq2 + gamct + np.log(wofz(1j*(z_lq + nuc[fullind])).real))
-            tz = t1-z
+            upsi1 = -np.exp(
+                z_lq2 + gamct + np.log(wofz(1j * (z_lq + nuc[fullind])).real)
+            )
+            tz = t1 - z
             z1 = zt_lq + nuc[fullind]
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             if indv1[0].shape > 0:
-                upsi1[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upsi1[indv1] += np.exp(
+                    zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)
+                )
             if indv2[0].shape > 0:
-                nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi1[indv2] += np.exp(nuac2 - gamc[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real))
-            upsi1[t1[:, 0] == 0., :] = 0.
+                nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi1[indv2] += np.exp(
+                    nuac2 - gamc[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upsi1[t1[:, 0] == 0.0, :] = 0.0
 
-            upsi2 = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])).real))
+            upsi2 = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])).real))
             z1 = zt_lq + nu[fullind]
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             if indv1[0].shape > 0:
-                upsi2[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upsi2[indv1] += np.exp(
+                    zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)
+                )
             if indv2[0].shape > 0:
-                nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi2[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real))
-            upsi2[t1[:, 0] == 0., :] = 0.
+                nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi2[indv2] += np.exp(
+                    nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upsi2[t1[:, 0] == 0.0, :] = 0.0
 
-            #Gradient wrt lq
-            la1 = S_wpi*nu2
-            la1c = S_wpi*nuc2
-            la2 = S_w*(.5*lq)
-            uplq = ezt_lq2*(gamc_2[ind]) + ezgamct*(-z_lq/lq[0, index2] + gamc_2[ind])\
-                - ezt_lq2*(gam_2[ind]) - ezgamt*(-z_lq/lq[0, index2] + gam_2[ind])
+            # Gradient wrt lq
+            la1 = S_wpi * nu2
+            la1c = S_wpi * nuc2
+            la2 = S_w * (0.5 * lq)
+            uplq = (
+                ezt_lq2 * (gamc_2[ind])
+                + ezgamct * (-z_lq / lq[0, index2] + gamc_2[ind])
+                - ezt_lq2 * (gam_2[ind])
+                - ezgamt * (-z_lq / lq[0, index2] + gam_2[ind])
+            )
 
-            glq[ind2t] = la1c[np.ix_(ind, index2)]*upsi1 - la1[np.ix_(ind, index2)]*upsi2\
-                + la2[np.ix_(ind, index2)]*uplq
+            glq[ind2t] = (
+                la1c[np.ix_(ind, index2)] * upsi1
+                - la1[np.ix_(ind, index2)] * upsi2
+                + la2[np.ix_(ind, index2)] * uplq
+            )
 
+            # Gradient wrt S
+            Sa1 = (lq * (-0.25 * np.sqrt(np.pi))) / w
 
-            #Gradient wrt S
-            Sa1 = (lq*(-.25*np.sqrt(np.pi)))/w
+            gSdq[ind2t] = Sa1[np.ix_(ind, index2)] * (upsi1 - upsi2)
 
-            gSdq[ind2t] = Sa1[np.ix_(ind, index2)]*(upsi1 - upsi2)
-
-            #Gradient wrt B
-            #Dx1 terms
-            dgam_dB = .5/w
+            # Gradient wrt B
+            # Dx1 terms
+            dgam_dB = 0.5 / w
             dgamc_dB = -dgam_dB
-            #DxQ terms
-            Ba1 = .5*(c0/w2)
-            Ba2 = c0*dgam_dB
-            Ba3 = lq2*gam_2
-            Ba4 = (dgam_dB*S_w)*(.25*lq2)
+            # DxQ terms
+            Ba1 = 0.5 * (c0 / w2)
+            Ba2 = c0 * dgam_dB
+            Ba3 = lq2 * gam_2
+            Ba4 = (dgam_dB * S_w) * (0.25 * lq2)
 
-            Ba2c = c0*dgamc_dB
-            Ba3c = lq2*gamc_2
-            Ba4c = (dgamc_dB*S_w)*(.25*lq2)
+            Ba2c = c0 * dgamc_dB
+            Ba3c = lq2 * gamc_2
+            Ba4c = (dgamc_dB * S_w) * (0.25 * lq2)
 
-            gB[ind2t] = (Ba1[np.ix_(ind, index2)] + Ba2c[np.ix_(ind, index2)]*(Ba3c[np.ix_(ind, index2)] - (t1-z)))*upsi1\
-                + Ba4c[np.ix_(ind, index2)]*(ezt_lq2 + ezgamct)\
-                - (Ba1[np.ix_(ind, index2)] + Ba2[np.ix_(ind, index2)]*(Ba3[np.ix_(ind, index2)] - (t1-z)))*upsi2\
-                - Ba4[np.ix_(ind, index2)]*(ezt_lq2 + ezgamt)
+            gB[ind2t] = (
+                (
+                    Ba1[np.ix_(ind, index2)]
+                    + Ba2c[np.ix_(ind, index2)] * (Ba3c[np.ix_(ind, index2)] - (t1 - z))
+                )
+                * upsi1
+                + Ba4c[np.ix_(ind, index2)] * (ezt_lq2 + ezgamct)
+                - (
+                    Ba1[np.ix_(ind, index2)]
+                    + Ba2[np.ix_(ind, index2)] * (Ba3[np.ix_(ind, index2)] - (t1 - z))
+                )
+                * upsi2
+                - Ba4[np.ix_(ind, index2)] * (ezt_lq2 + ezgamt)
+            )
 
-            #Gradient wrt C
-            #Dx1 terms
-            dgam_dC = 0.5 - .5*(alphad/w)
-            dgamc_dC = 0.5 + .5*(alphad/w)
-            #DxQ terms
-            Ca1 = -c0*(.5*alphad/w2)
-            Ca2 = c0*dgam_dC
-            Ca4 = (dgam_dC*S_w)*(.25*lq2)
+            # Gradient wrt C
+            # Dx1 terms
+            dgam_dC = 0.5 - 0.5 * (alphad / w)
+            dgamc_dC = 0.5 + 0.5 * (alphad / w)
+            # DxQ terms
+            Ca1 = -c0 * (0.5 * alphad / w2)
+            Ca2 = c0 * dgam_dC
+            Ca4 = (dgam_dC * S_w) * (0.25 * lq2)
 
-            Ca2c = c0*dgamc_dC
-            Ca4c = (dgamc_dC*S_w)*(.25*lq2)
+            Ca2c = c0 * dgamc_dC
+            Ca4c = (dgamc_dC * S_w) * (0.25 * lq2)
 
-            gC[ind2t] = (Ca1[np.ix_(ind, index2)] + Ca2c[np.ix_(ind, index2)]*(Ba3c[np.ix_(ind, index2)] - (t1-z)))*upsi1\
-                + Ca4c[np.ix_(ind, index2)]*(ezt_lq2 + ezgamct)\
-                - (Ca1[np.ix_(ind, index2)] + Ca2[np.ix_(ind, index2)]*(Ba3[np.ix_(ind, index2)] - (t1-z)))*upsi2\
-                - Ca4[np.ix_(ind, index2)]*(ezt_lq2 + ezgamt)
+            gC[ind2t] = (
+                (
+                    Ca1[np.ix_(ind, index2)]
+                    + Ca2c[np.ix_(ind, index2)] * (Ba3c[np.ix_(ind, index2)] - (t1 - z))
+                )
+                * upsi1
+                + Ca4c[np.ix_(ind, index2)] * (ezt_lq2 + ezgamct)
+                - (
+                    Ca1[np.ix_(ind, index2)]
+                    + Ca2[np.ix_(ind, index2)] * (Ba3[np.ix_(ind, index2)] - (t1 - z))
+                )
+                * upsi2
+                - Ca4[np.ix_(ind, index2)] * (ezt_lq2 + ezgamt)
+            )
 
         return glq, gSdq, gB, gC
 
-    #TODO: reduce memory usage
-    def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z)
-        index = index.reshape(index.size,)
-        #terms that move along t
+    # TODO: reduce memory usage
+    def _gkfu_z(self, X, index, Z, index2):  # Kfu(t,z)
+        index = index.reshape(
+            index.size,
+        )
+        # terms that move along t
         d = np.unique(index)
         B = self.B[d].values
         C = self.C[d].values
         S = self.W[d, :].values
-        #Index transformation
+        # Index transformation
         indd = np.arange(self.output_dim)
         indd[d] = np.arange(d.size)
         index = indd[index]
-        #Check where wd becomes complex
-        wbool = C*C >= 4.*B
+        # Check where wd becomes complex
+        wbool = C * C >= 4.0 * B
         wbool2 = wbool[index]
         ind2t = np.where(wbool2)
         ind3t = np.where(np.logical_not(wbool2))
-        #t column
+        # t column
         t = X[:, 0].reshape(X.shape[0], 1)
         C = C.reshape(C.size, 1)
         B = B.reshape(B.size, 1)
-        C2 = C*C
-        alpha = .5*C
-        #z row
+        C2 = C * C
+        alpha = 0.5 * C
+        # z row
         z = Z[:, 0].reshape(1, Z.shape[0])
-        index2 = index2.reshape(index2.size,)
+        index2 = index2.reshape(
+            index2.size,
+        )
         lq = self.lengthscale.values.reshape((1, self.rank))
 
-        #kfu = np.empty((t.size, z.size))
+        # kfu = np.empty((t.size, z.size))
         gz = np.empty((t.size, z.size))
         indD = np.arange(B.size)
-        #(1) when wd is real
+        # (1) when wd is real
         if np.any(np.logical_not(wbool)):
-            #Indexes of index and t related to (2)
+            # Indexes of index and t related to (2)
             t1 = t[ind3t]
             ind = index[ind3t]
-            #TODO: Find a better way of doing this
-            #Index transformation
+            # TODO: Find a better way of doing this
+            # Index transformation
             d = np.asarray(np.where(np.logical_not(wbool))[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            w = .5*np.sqrt(4.*B[d] - C2[d])
+            # Dx1 terms
+            w = 0.5 * np.sqrt(4.0 * B[d] - C2[d])
             alphad = alpha[d]
-            gam = alphad - 1j*w
-            S_w = S[d]/w
-            S_wpi =S_w*(.5*np.sqrt(np.pi))
-            #DxQ terms
-            c0 = S_wpi*lq #lq*Sdq*sqrt(pi)/(2w)
-            nu = (.5*gam)*lq
+            gam = alphad - 1j * w
+            S_w = S[d] / w
+            S_wpi = S_w * (0.5 * np.sqrt(np.pi))
+            # DxQ terms
+            c0 = S_wpi * lq  # lq*Sdq*sqrt(pi)/(2w)
+            nu = (0.5 * gam) * lq
 
-            #1xM terms
-            z_lq = z/lq[0, index2]
-            z_lq2 = -z_lq*z_lq
-            #NxQ terms
-            t_lq = t1/lq
-            #DxM terms
-            gamt = -gam[ind]*t1
-            #NxM terms
+            # 1xM terms
+            z_lq = z / lq[0, index2]
+            z_lq2 = -z_lq * z_lq
+            # NxQ terms
+            t_lq = t1 / lq
+            # DxM terms
+            gamt = -gam[ind] * t1
+            # NxM terms
             zt_lq = z_lq - t_lq[:, index2]
-            zt_lq2 = -zt_lq*zt_lq
-            #ezt_lq2 = -np.exp(zt_lq2)
+            zt_lq2 = -zt_lq * zt_lq
+            # ezt_lq2 = -np.exp(zt_lq2)
             ezgamt = np.exp(z_lq2 + gamt)
 
             # Upsilon calculations
             fullind = np.ix_(ind, index2)
-            upsi = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind]))))
-            tz = t1-z
+            upsi = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind]))))
+            tz = t1 - z
             z1 = zt_lq + nu[fullind]
-            indv1 = np.where(z1.real >= 0.)
-            indv2 = np.where(z1.real < 0.)
+            indv1 = np.where(z1.real >= 0.0)
+            indv2 = np.where(z1.real < 0.0)
             if indv1[0].shape > 0:
-                upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1])))
+                upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1])))
             if indv2[0].shape > 0:
-                nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2])))
-            upsi[t1[:, 0] == 0., :] = 0.
+                nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi[indv2] += np.exp(
+                    nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2])))
+            upsi[t1[:, 0] == 0.0, :] = 0.0
 
-            #Gradient wrt z
-            za1 = c0*gam
-            #za2 = S_w
-            gz[ind3t] = (za1[np.ix_(ind, index2)]*upsi).imag + S_w[np.ix_(ind, index2)]*ezgamt.imag
+            # Gradient wrt z
+            za1 = c0 * gam
+            # za2 = S_w
+            gz[ind3t] = (za1[np.ix_(ind, index2)] * upsi).imag + S_w[
+                np.ix_(ind, index2)
+            ] * ezgamt.imag
 
-        #(2) when wd is complex
+        # (2) when wd is complex
         if np.any(wbool):
-            #Indexes of index and t related to (2)
+            # Indexes of index and t related to (2)
             t1 = t[ind2t]
             ind = index[ind2t]
-            #Index transformation
+            # Index transformation
             d = np.asarray(np.where(wbool)[0])
             indd = indD.copy()
             indd[d] = np.arange(d.size)
             ind = indd[ind]
-            #Dx1 terms
-            w = .5*np.sqrt(C2[d] - 4.*B[d])
+            # Dx1 terms
+            w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d])
             alphad = alpha[d]
             gam = alphad - w
             gamc = alphad + w
-            #DxQ terms
-            S_w = -S[d]/w #minus is given by j*j
-            S_wpi = S_w*(.25*np.sqrt(np.pi))
-            c0 = S_wpi*lq
-            nu = .5*gam*lq
-            nuc = .5*gamc*lq
+            # DxQ terms
+            S_w = -S[d] / w  # minus is given by j*j
+            S_wpi = S_w * (0.25 * np.sqrt(np.pi))
+            c0 = S_wpi * lq
+            nu = 0.5 * gam * lq
+            nuc = 0.5 * gamc * lq
 
-            #1xM terms
-            z_lq = z/lq[0, index2]
-            z_lq2 = -z_lq*z_lq
-            #Nx1
-            gamt = -gam[ind]*t1
-            gamct = -gamc[ind]*t1
-            #NxQ terms
-            t_lq = t1/lq
-            #NxM terms
+            # 1xM terms
+            z_lq = z / lq[0, index2]
+            z_lq2 = -z_lq * z_lq
+            # Nx1
+            gamt = -gam[ind] * t1
+            gamct = -gamc[ind] * t1
+            # NxQ terms
+            t_lq = t1 / lq
+            # NxM terms
             zt_lq = z_lq - t_lq[:, index2]
             ezgamt = np.exp(z_lq2 + gamt)
             ezgamct = np.exp(z_lq2 + gamct)
 
             # Upsilon calculations
-            zt_lq2 = -zt_lq*zt_lq
+            zt_lq2 = -zt_lq * zt_lq
             fullind = np.ix_(ind, index2)
-            upsi1 = - np.exp(z_lq2 + gamct + np.log(wofz(1j*(z_lq + nuc[fullind])).real))
-            tz = t1-z
+            upsi1 = -np.exp(
+                z_lq2 + gamct + np.log(wofz(1j * (z_lq + nuc[fullind])).real)
+            )
+            tz = t1 - z
             z1 = zt_lq + nuc[fullind]
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             if indv1[0].shape > 0:
-                upsi1[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upsi1[indv1] += np.exp(
+                    zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)
+                )
             if indv2[0].shape > 0:
-                nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi1[indv2] += np.exp(nuac2 - gamc[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real))
-            upsi1[t1[:, 0] == 0., :] = 0.
+                nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi1[indv2] += np.exp(
+                    nuac2 - gamc[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upsi1[t1[:, 0] == 0.0, :] = 0.0
 
-            upsi2 = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])).real))
+            upsi2 = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])).real))
             z1 = zt_lq + nu[fullind]
-            indv1 = np.where(z1 >= 0.)
-            indv2 = np.where(z1 < 0.)
+            indv1 = np.where(z1 >= 0.0)
+            indv2 = np.where(z1 < 0.0)
             if indv1[0].shape > 0:
-                upsi2[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real))
+                upsi2[indv1] += np.exp(
+                    zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)
+                )
             if indv2[0].shape > 0:
-                nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2
-                upsi2[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\
-                               - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real))
-            upsi2[t1[:, 0] == 0., :] = 0.
+                nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2
+                upsi2[indv2] += np.exp(
+                    nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0)
+                ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real))
+            upsi2[t1[:, 0] == 0.0, :] = 0.0
 
-            #Gradient wrt z
-            za1 = c0*gam
-            za1c = c0*gamc
-            za2 = .5*S_w
-            gz[ind2t] = za1c[np.ix_(ind, index2)]*upsi1 - za1[np.ix_(ind, index2)]*upsi2\
-                + za2[np.ix_(ind, index2)]*(ezgamct - ezgamt)
+            # Gradient wrt z
+            za1 = c0 * gam
+            za1c = c0 * gamc
+            za2 = 0.5 * S_w
+            gz[ind2t] = (
+                za1c[np.ix_(ind, index2)] * upsi1
+                - za1[np.ix_(ind, index2)] * upsi2
+                + za2[np.ix_(ind, index2)] * (ezgamct - ezgamt)
+            )
         return gz
diff --git a/GPy/kern/src/todo/eq_ode1.py b/GPy/kern/src/todo/eq_ode1.py
index bf0ca7e4..7104a8e9 100644
--- a/GPy/kern/src/todo/eq_ode1.py
+++ b/GPy/kern/src/todo/eq_ode1.py
@@ -121,7 +121,7 @@ class Eq_ode1(Kernpart):
             target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
 
     def Kdiag(self,index,target):
-        #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
+        #target += np.diag(self.B)[np.asarray(index,dtype=int).flatten()]
         pass
     
     def _param_grad_helper(self,dL_dK,X,X2,target):
@@ -203,7 +203,7 @@ class Eq_ode1(Kernpart):
         self._t = X[:, 0]
         if not X.shape[1] == 2:
             raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
-        self._index = np.asarray(X[:, 1],dtype=np.int)
+        self._index = np.asarray(X[:, 1],dtype=int)
         # Sort indices so that outputs are in blocks for computational
         # convenience.
         self._order = self._index.argsort()
@@ -220,7 +220,7 @@ class Eq_ode1(Kernpart):
             if not X2.shape[1] == 2:
                 raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
             self._t2 = X2[:, 0]
-            self._index2 = np.asarray(X2[:, 1],dtype=np.int)
+            self._index2 = np.asarray(X2[:, 1],dtype=int)
             self._order2 = self._index2.argsort()
             self._index2 = self._index2[self._order2]
             self._t2 = self._t2[self._order2]
diff --git a/GPy/models/sparse_gp_coregionalized_regression.py b/GPy/models/sparse_gp_coregionalized_regression.py
index 2a19d52c..43e782bf 100644
--- a/GPy/models/sparse_gp_coregionalized_regression.py
+++ b/GPy/models/sparse_gp_coregionalized_regression.py
@@ -7,6 +7,7 @@ from ..inference.latent_function_inference import VarDTC
 from .. import kern
 from .. import util
 
+
 class SparseGPCoregionalizedRegression(SparseGP):
     """
     Sparse Gaussian Process model for heteroscedastic multioutput regression
@@ -34,34 +35,65 @@ class SparseGPCoregionalizedRegression(SparseGP):
     :type kernel_name: string
     """
 
-    def __init__(self, X_list, Y_list, Z_list=[], kernel=None, likelihoods_list=None, num_inducing=10, X_variance=None, name='SGPCR',W_rank=1,kernel_name='coreg'):
-
-        #Input and Output
-        X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list)
+    def __init__(
+        self,
+        X_list,
+        Y_list,
+        Z_list=[],
+        kernel=None,
+        likelihoods_list=None,
+        num_inducing=10,
+        X_variance=None,
+        name="SGPCR",
+        W_rank=1,
+        kernel_name="coreg",
+    ):
+        # Input and Output
+        X, Y, self.output_index = util.multioutput.build_XY(X_list, Y_list)
         Ny = len(Y_list)
 
-        #Kernel
+        # Kernel
         if kernel is None:
-            kernel = kern.RBF(X.shape[1]-1)
-            
-            kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kernel, W_rank=W_rank, name=kernel_name)
+            kernel = kern.RBF(X.shape[1] - 1)
 
-        #Likelihood
-        likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
+            kernel = util.multioutput.ICM(
+                input_dim=X.shape[1] - 1,
+                num_outputs=Ny,
+                kernel=kernel,
+                W_rank=W_rank,
+                name=kernel_name,
+            )
 
-        #Inducing inputs list
+        # Likelihood
+        likelihood = util.multioutput.build_likelihood(
+            Y_list, self.output_index, likelihoods_list
+        )
+
+        # Inducing inputs list
         if len(Z_list):
-            assert len(Z_list) == Ny, 'Number of outputs do not match length of inducing inputs list.'
+            assert (
+                len(Z_list) == Ny
+            ), "Number of outputs do not match length of inducing inputs list."
         else:
-            if isinstance(num_inducing,np.int):
+            if isinstance(num_inducing, int):
                 num_inducing = [num_inducing] * Ny
             num_inducing = np.asarray(num_inducing)
-            assert num_inducing.size == Ny, 'Number of outputs do not match length of inducing inputs list.'
-            for ni,Xi in zip(num_inducing,X_list):
+            assert (
+                num_inducing.size == Ny
+            ), "Number of outputs do not match length of inducing inputs list."
+            for ni, Xi in zip(num_inducing, X_list):
                 i = np.random.permutation(Xi.shape[0])[:ni]
                 Z_list.append(Xi[i].copy())
 
         Z, _, Iz = util.multioutput.build_XY(Z_list)
 
-        super(SparseGPCoregionalizedRegression, self).__init__(X, Y, Z, kernel, likelihood, inference_method=VarDTC(), Y_metadata={'output_index':self.output_index})
-        self['.*inducing'][:,-1].fix()
+        super(SparseGPCoregionalizedRegression, self).__init__(
+            X,
+            Y,
+            Z,
+            kernel,
+            likelihood,
+            inference_method=VarDTC(),
+            Y_metadata={"output_index": self.output_index},
+        )
+        self[".*inducing"][:, -1].fix()
diff --git a/GPy/models/ss_mrd.py b/GPy/models/ss_mrd.py
index 0aa472c7..c4dbec78 100644
--- a/GPy/models/ss_mrd.py
+++ b/GPy/models/ss_mrd.py
@@ -5,52 +5,110 @@ The Maniforld Relevance Determination model with the spike-and-slab prior
 import numpy as np
 from ..core import Model
 from .ss_gplvm import SSGPLVM
-from GPy.core.parameterization.variational import SpikeAndSlabPrior,NormalPosterior,VariationalPrior
+from GPy.core.parameterization.variational import (
+    SpikeAndSlabPrior,
+    NormalPosterior,
+    VariationalPrior,
+)
 from ..util.misc import param_to_array
 from ..kern import RBF
 from ..core import Param
 from numpy.linalg.linalg import LinAlgError
 
+
 class SSMRD(Model):
-    
-    def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute', 
-                 num_inducing=10, Zs=None, kernels=None, inference_methods=None, likelihoods=None, group_spike=True,
-                 pi=0.5, name='ss_mrd', Ynames=None, mpi_comm=None, IBP=False, alpha=2., taus=None, ):
+    def __init__(
+        self,
+        Ylist,
+        input_dim,
+        X=None,
+        X_variance=None,
+        Gammas=None,
+        initx="PCA_concat",
+        initz="permute",
+        num_inducing=10,
+        Zs=None,
+        kernels=None,
+        inference_methods=None,
+        likelihoods=None,
+        group_spike=True,
+        pi=0.5,
+        name="ss_mrd",
+        Ynames=None,
+        mpi_comm=None,
+        IBP=False,
+        alpha=2.0,
+        taus=None,
+    ):
         super(SSMRD, self).__init__(name)
         self.mpi_comm = mpi_comm
         self._PROPAGATE_ = False
-        
+
         # initialize X for individual models
-        X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
+        X, X_variance, Gammas, fracs = self._init_X(
+            Ylist, input_dim, X, X_variance, Gammas, initx
+        )
         self.X = NormalPosterior(means=X, variances=X_variance)
-        
+
         if kernels is None:
-            kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in range(len(Ylist))]
+            kernels = [
+                RBF(input_dim, lengthscale=1.0 / fracs, ARD=True)
+                for i in range(len(Ylist))
+            ]
         if Zs is None:
-            Zs = [None]* len(Ylist)
+            Zs = [None] * len(Ylist)
         if likelihoods is None:
-            likelihoods = [None]* len(Ylist)
+            likelihoods = [None] * len(Ylist)
         if inference_methods is None:
-            inference_methods = [None]* len(Ylist)
-        
+            inference_methods = [None] * len(Ylist)
+
         if IBP:
-            self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in range(len(Ylist))]
+            self.var_priors = [
+                IBPPrior_SSMRD(len(Ylist), input_dim, alpha=alpha)
+                for i in range(len(Ylist))
+            ]
         else:
-            self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in range(len(Ylist))]
-        self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike,
-                               kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i],
-                               name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)]
-        self.link_parameters(*(self.models+[self.X]))
-        
+            self.var_priors = [
+                SpikeAndSlabPrior_SSMRD(
+                    nModels=len(Ylist), pi=pi, learnPi=False, group_spike=group_spike
+                )
+                for i in range(len(Ylist))
+            ]
+        self.models = [
+            SSGPLVM(
+                y,
+                input_dim,
+                X=X.copy(),
+                X_variance=X_variance.copy(),
+                Gamma=Gammas[i],
+                num_inducing=num_inducing,
+                Z=Zs[i],
+                learnPi=False,
+                group_spike=group_spike,
+                kernel=kernels[i],
+                inference_method=inference_methods[i],
+                likelihood=likelihoods[i],
+                variational_prior=self.var_priors[i],
+                IBP=IBP,
+                tau=None if taus is None else taus[i],
+                name="model_" + str(i),
+                mpi_comm=mpi_comm,
+                sharedX=True,
+            )
+            for i, y in enumerate(Ylist)
+        ]
+        self.link_parameters(*(self.models + [self.X]))
+
     def _propogate_X_val(self):
-        if self._PROPAGATE_: return
+        if self._PROPAGATE_:
+            return
         for m in self.models:
             m.X.mean.values[:] = self.X.mean.values
             m.X.variance.values[:] = self.X.variance.values
         varp_list = [m.X for m in self.models]
         [vp._update_inernal(varp_list) for vp in self.var_priors]
-        self._PROPAGATE_=True
-    
+        self._PROPAGATE_ = True
+
     def _collate_X_gradient(self):
         self._PROPAGATE_ = False
         self.X.mean.gradient[:] = 0
@@ -58,86 +116,92 @@ class SSMRD(Model):
         for m in self.models:
             self.X.mean.gradient += m.X.mean.gradient
             self.X.variance.gradient += m.X.variance.gradient
-        
+
     def parameters_changed(self):
         super(SSMRD, self).parameters_changed()
         [m.parameters_changed() for m in self.models]
-        self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
+        self._log_marginal_likelihood = sum(
+            [m._log_marginal_likelihood for m in self.models]
+        )
         self._collate_X_gradient()
 
     def log_likelihood(self):
         return self._log_marginal_likelihood
-    
-    def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
-        
+
+    def _init_X(
+        self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx="PCA_concat"
+    ):
         # Divide latent dimensions
-        idx = np.empty((input_dim,),dtype=np.int)
-        residue = (input_dim)%(len(Ylist))
+        idx = np.empty((input_dim,), dtype=int)
+        residue = (input_dim) % (len(Ylist))
         for i in range(len(Ylist)):
             if i < residue:
-                size = input_dim/len(Ylist)+1
-                idx[i*size:(i+1)*size] = i
+                size = input_dim / len(Ylist) + 1
+                idx[i * size : (i + 1) * size] = i
             else:
-                size = input_dim/len(Ylist)
-                idx[i*size+residue:(i+1)*size+residue] = i
-        
+                size = input_dim / len(Ylist)
+                idx[i * size + residue : (i + 1) * size + residue] = i
+
         if X is None:
-            if initx == 'PCA_concat':
-                X = np.empty((Ylist[0].shape[0],input_dim))
+            if initx == "PCA_concat":
+                X = np.empty((Ylist[0].shape[0], input_dim))
                 fracs = np.empty((input_dim,))
                 from ..util.initialization import initialize_latent
+
                 for i in range(len(Ylist)):
                     Y = Ylist[i]
-                    dim = (idx==i).sum()
-                    if dim>0:
-                        x, fr = initialize_latent('PCA', dim, Y)
-                        X[:,idx==i] = x
-                        fracs[idx==i] = fr
-            elif initx=='PCA_joint':
+                    dim = (idx == i).sum()
+                    if dim > 0:
+                        x, fr = initialize_latent("PCA", dim, Y)
+                        X[:, idx == i] = x
+                        fracs[idx == i] = fr
+            elif initx == "PCA_joint":
                 y = np.hstack(Ylist)
                 from ..util.initialization import initialize_latent
-                X, fracs = initialize_latent('PCA', input_dim, y)
+
+                X, fracs = initialize_latent("PCA", input_dim, y)
             else:
                 X = np.random.randn(Ylist[0].shape[0], input_dim)
                 fracs = np.ones(input_dim)
         else:
             fracs = np.ones(input_dim)
-            
-    
-        if X_variance is None: # The variance of the variational approximation (S)
-            X_variance = np.random.uniform(0,.1,X.shape)
-            
+
+        if X_variance is None:  # The variance of the variational approximation (S)
+            X_variance = np.random.uniform(0, 0.1, X.shape)
+
         if Gammas is None:
             Gammas = []
             for x in X:
-                gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
+                gamma = np.empty_like(
+                    X
+                )  # The posterior probabilities of the binary variable in the variational approximation
                 gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
-                gamma[gamma>1.-1e-9] = 1.-1e-9
-                gamma[gamma<1e-9] = 1e-9
+                gamma[gamma > 1.0 - 1e-9] = 1.0 - 1e-9
+                gamma[gamma < 1e-9] = 1e-9
                 Gammas.append(gamma)
         return X, X_variance, Gammas, fracs
 
     @Model.optimizer_array.setter
     def optimizer_array(self, p):
         if self.mpi_comm != None:
-            if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
-                self.mpi_comm.Bcast(np.int32(1),root=0)
-            self.mpi_comm.Bcast(p, root=0)        
-        Model.optimizer_array.fset(self,p)
-        
+            if self._IN_OPTIMIZATION_ and self.mpi_comm.rank == 0:
+                self.mpi_comm.Bcast(np.int32(1), root=0)
+            self.mpi_comm.Bcast(p, root=0)
+        Model.optimizer_array.fset(self, p)
+
     def optimize(self, optimizer=None, start=None, **kwargs):
         self._IN_OPTIMIZATION_ = True
-        if self.mpi_comm==None:
-            super(SSMRD, self).optimize(optimizer,start,**kwargs)
-        elif self.mpi_comm.rank==0:
-            super(SSMRD, self).optimize(optimizer,start,**kwargs)
-            self.mpi_comm.Bcast(np.int32(-1),root=0)
-        elif self.mpi_comm.rank>0:
+        if self.mpi_comm == None:
+            super(SSMRD, self).optimize(optimizer, start, **kwargs)
+        elif self.mpi_comm.rank == 0:
+            super(SSMRD, self).optimize(optimizer, start, **kwargs)
+            self.mpi_comm.Bcast(np.int32(-1), root=0)
+        elif self.mpi_comm.rank > 0:
             x = self.optimizer_array.copy()
-            flag = np.empty(1,dtype=np.int32)
+            flag = np.empty(1, dtype=np.int32)
             while True:
-                self.mpi_comm.Bcast(flag,root=0)
-                if flag==1:
+                self.mpi_comm.Bcast(flag, root=0)
+                if flag == 1:
                     try:
                         self.optimizer_array = x
                         self._fail_count = 0
@@ -145,29 +209,51 @@ class SSMRD(Model):
                         if self._fail_count >= self._allowed_failures:
                             raise
                         self._fail_count += 1
-                elif flag==-1:
+                elif flag == -1:
                     break
                 else:
                     self._IN_OPTIMIZATION_ = False
                     raise Exception("Unrecognizable flag for synchronization!")
         self._IN_OPTIMIZATION_ = False
-        
+
 
 class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
-    def __init__(self, nModels, pi=0.5, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
+    def __init__(
+        self,
+        nModels,
+        pi=0.5,
+        learnPi=False,
+        group_spike=True,
+        variance=1.0,
+        name="SSMRDPrior",
+        **kw
+    ):
         self.nModels = nModels
         self._b_prob_all = 0.5
-        super(SpikeAndSlabPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
-    
+        super(SpikeAndSlabPrior_SSMRD, self).__init__(
+            pi=pi,
+            learnPi=learnPi,
+            group_spike=group_spike,
+            variance=variance,
+            name=name,
+            **kw
+        )
+
     def _update_inernal(self, varp_list):
         """Make an update of the internal status by gathering the variational posteriors for all the individual models."""
         # The probability for the binary variable for the same latent dimension of any of the models is on.
         if self.group_spike:
-            self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
-            [np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
+            self._b_prob_all = 1.0 - param_to_array(varp_list[0].gamma_group)
+            [
+                np.multiply(self._b_prob_all, 1.0 - vp.gamma_group, self._b_prob_all)
+                for vp in varp_list[1:]
+            ]
         else:
-            self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
-            [np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]            
+            self._b_prob_all = 1.0 - param_to_array(varp_list[0].binary_prob)
+            [
+                np.multiply(self._b_prob_all, 1.0 - vp.binary_prob, self._b_prob_all)
+                for vp in varp_list[1:]
+            ]
 
     def KL_divergence(self, variational_posterior):
         mu = variational_posterior.mean
@@ -176,16 +262,20 @@ class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
             gamma = variational_posterior.binary_prob[0]
         else:
             gamma = variational_posterior.binary_prob
-        if len(self.pi.shape)==2:
-            idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
+        if len(self.pi.shape) == 2:
+            idx = np.unique(gamma._raveled_index() / gamma.shape[-1])
             pi = self.pi[idx]
         else:
             pi = self.pi
 
-        var_mean = np.square(mu)/self.variance
-        var_S = (S/self.variance - np.log(S))
-        var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
-        return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
+        var_mean = np.square(mu) / self.variance
+        var_S = S / self.variance - np.log(S)
+        var_gamma = (gamma * np.log(gamma / pi)).sum() + (
+            (1 - gamma) * np.log((1 - gamma) / (1 - pi))
+        ).sum()
+        return var_gamma + (
+            (1.0 - self._b_prob_all) * (np.log(self.variance) - 1.0 + var_mean + var_S)
+        ).sum() / (2.0 * self.nModels)
 
     def update_gradients_KL(self, variational_posterior):
         mu = variational_posterior.mean
@@ -195,63 +285,141 @@ class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
             gamma = variational_posterior.binary_prob.values[0]
         else:
             gamma = variational_posterior.binary_prob.values
-        if len(self.pi.shape)==2:
-            idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
+        if len(self.pi.shape) == 2:
+            idx = np.unique(gamma._raveled_index() / gamma.shape[-1])
             pi = self.pi[idx]
         else:
             pi = self.pi
 
         if self.group_spike:
-            tmp = self._b_prob_all/(1.-gamma)
-            variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
+            tmp = self._b_prob_all / (1.0 - gamma)
+            variational_posterior.binary_prob.gradient -= (
+                np.log((1 - pi) / pi * gamma / (1.0 - gamma)) / N
+                + tmp
+                * (
+                    (np.square(mu) + S) / self.variance
+                    - np.log(S)
+                    + np.log(self.variance)
+                    - 1.0
+                )
+                / 2.0
+            )
         else:
-            variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
-        mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
-        S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
+            variational_posterior.binary_prob.gradient -= (
+                np.log((1 - pi) / pi * gamma / (1.0 - gamma))
+                + (
+                    (np.square(mu) + S) / self.variance
+                    - np.log(S)
+                    + np.log(self.variance)
+                    - 1.0
+                )
+                / 2.0
+            )
+        mu.gradient -= (1.0 - self._b_prob_all) * mu / (self.variance * self.nModels)
+        S.gradient -= (
+            (1.0 / self.variance - 1.0 / S)
+            * (1.0 - self._b_prob_all)
+            / (2.0 * self.nModels)
+        )
         if self.learnPi:
-            raise 'Not Supported!'
+            raise "Not Supported!"
+
 
 class IBPPrior_SSMRD(VariationalPrior):
-    def __init__(self, nModels, input_dim, alpha =2., tau=None, name='IBPPrior', **kw):
+    def __init__(self, nModels, input_dim, alpha=2.0, tau=None, name="IBPPrior", **kw):
         super(IBPPrior_SSMRD, self).__init__(name=name, **kw)
-        from paramz.transformations import Logexp, __fixed__  
+        from paramz.transformations import Logexp, __fixed__
+
         self.nModels = nModels
         self._b_prob_all = 0.5
         self.input_dim = input_dim
-        self.variance = 1.
-        self.alpha = Param('alpha', alpha, __fixed__)
+        self.variance = 1.0
+        self.alpha = Param("alpha", alpha, __fixed__)
         self.link_parameter(self.alpha)
-        
+
     def _update_inernal(self, varp_list):
         """Make an update of the internal status by gathering the variational posteriors for all the individual models."""
         # The probability for the binary variable for the same latent dimension of any of the models is on.
-        self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
-        [np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
+        self._b_prob_all = 1.0 - param_to_array(varp_list[0].gamma_group)
+        [
+            np.multiply(self._b_prob_all, 1.0 - vp.gamma_group, self._b_prob_all)
+            for vp in varp_list[1:]
+        ]
 
     def KL_divergence(self, variational_posterior):
-        mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
-            
-        var_mean = np.square(mu)/self.variance
-        var_S = (S/self.variance - np.log(S))
-        part1 = ((1.-self._b_prob_all)* (np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
-        
-        ad = self.alpha/self.input_dim
-        from scipy.special import betaln,digamma
-        part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + (betaln(ad,1.)*self.input_dim -betaln(tau[:,0], tau[:,1]).sum())/self.nModels \
-                 + (( (tau[:,0]-ad)/self.nModels -gamma)*digamma(tau[:,0])).sum() + \
-                (((tau[:,1]-1.)/self.nModels+gamma-1.)*digamma(tau[:,1])).sum() + (((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*digamma(tau.sum(axis=1))).sum()
-        return part1+part2
+        mu, S, gamma, tau = (
+            variational_posterior.mean.values,
+            variational_posterior.variance.values,
+            variational_posterior.gamma_group.values,
+            variational_posterior.tau.values,
+        )
+
+        var_mean = np.square(mu) / self.variance
+        var_S = S / self.variance - np.log(S)
+        part1 = (
+            (1.0 - self._b_prob_all) * (np.log(self.variance) - 1.0 + var_mean + var_S)
+        ).sum() / (2.0 * self.nModels)
+
+        ad = self.alpha / self.input_dim
+        from scipy.special import betaln, digamma
+
+        part2 = (
+            (gamma * np.log(gamma)).sum()
+            + ((1.0 - gamma) * np.log(1.0 - gamma)).sum()
+            + (betaln(ad, 1.0) * self.input_dim - betaln(tau[:, 0], tau[:, 1]).sum())
+            / self.nModels
+            + (((tau[:, 0] - ad) / self.nModels - gamma) * digamma(tau[:, 0])).sum()
+            + (
+                ((tau[:, 1] - 1.0) / self.nModels + gamma - 1.0) * digamma(tau[:, 1])
+            ).sum()
+            + (
+                ((1.0 + ad - tau[:, 0] - tau[:, 1]) / self.nModels + 1.0)
+                * digamma(tau.sum(axis=1))
+            ).sum()
+        )
+        return part1 + part2
 
     def update_gradients_KL(self, variational_posterior):
-        mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
+        mu, S, gamma, tau = (
+            variational_posterior.mean.values,
+            variational_posterior.variance.values,
+            variational_posterior.gamma_group.values,
+            variational_posterior.tau.values,
+        )
 
-        variational_posterior.mean.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
-        variational_posterior.variance.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
-        from scipy.special import digamma,polygamma
-        tmp = self._b_prob_all/(1.-gamma)
-        dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
-        variational_posterior.binary_prob.gradient -= dgamma+tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
-        ad = self.alpha/self.input_dim
-        common = ((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*polygamma(1,tau.sum(axis=1))
-        variational_posterior.tau.gradient[:,0] = -(((tau[:,0]-ad)/self.nModels -gamma)*polygamma(1,tau[:,0])+common)
-        variational_posterior.tau.gradient[:,1] = -(((tau[:,1]-1.)/self.nModels+gamma-1.)*polygamma(1,tau[:,1])+common)
+        variational_posterior.mean.gradient -= (
+            (1.0 - self._b_prob_all) * mu / (self.variance * self.nModels)
+        )
+        variational_posterior.variance.gradient -= (
+            (1.0 / self.variance - 1.0 / S)
+            * (1.0 - self._b_prob_all)
+            / (2.0 * self.nModels)
+        )
+        from scipy.special import digamma, polygamma
+
+        tmp = self._b_prob_all / (1.0 - gamma)
+        dgamma = (
+            np.log(gamma / (1.0 - gamma)) + digamma(tau[:, 1]) - digamma(tau[:, 0])
+        ) / variational_posterior.num_data
+        variational_posterior.binary_prob.gradient -= (
+            dgamma
+            + tmp
+            * (
+                (np.square(mu) + S) / self.variance
+                - np.log(S)
+                + np.log(self.variance)
+                - 1.0
+            )
+            / 2.0
+        )
+        ad = self.alpha / self.input_dim
+        common = ((1.0 + ad - tau[:, 0] - tau[:, 1]) / self.nModels + 1.0) * polygamma(
+            1, tau.sum(axis=1)
+        )
+        variational_posterior.tau.gradient[:, 0] = -(
+            ((tau[:, 0] - ad) / self.nModels - gamma) * polygamma(1, tau[:, 0]) + common
+        )
+        variational_posterior.tau.gradient[:, 1] = -(
+            ((tau[:, 1] - 1.0) / self.nModels + gamma - 1.0) * polygamma(1, tau[:, 1])
+            + common
+        )
diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py
index 6ed2fbeb..fb6693ec 100644
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@@ -16,6 +16,7 @@ import warnings
 
 try:
     from . import state_space_setup
+
     setup_available = True
 except ImportError as e:
     setup_available = False
@@ -25,13 +26,14 @@ print_verbose = False
 
 try:
     import state_space_cython
+
     cython_code_available = True
     if print_verbose:
         print("state_space: cython is available")
 except ImportError as e:
     cython_code_available = False
 
-#cython_code_available = False
+# cython_code_available = False
 # Use cython by default
 use_cython = False
 if setup_available:
@@ -49,7 +51,6 @@ tmp_buffer = None
 
 
 class Dynamic_Callables_Python(object):
-
     def f_a(self, k, m, A):
         """
         p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
@@ -113,6 +114,7 @@ class Dynamic_Callables_Python(object):
 
         raise NotImplemented("reset is not implemented!")
 
+
 if use_cython:
     Dynamic_Callables_Class = state_space_cython.Dynamic_Callables_Cython
 else:
@@ -183,9 +185,9 @@ class Measurement_Callables_Python(object):
 
         raise NotImplemented("reset is not implemented!")
 
+
 if use_cython:
-    Measurement_Callables_Class = state_space_cython.\
-        Measurement_Callables_Cython
+    Measurement_Callables_Class = state_space_cython.Measurement_Callables_Cython
 else:
     Measurement_Callables_Class = Measurement_Callables_Python
 
@@ -194,6 +196,7 @@ class R_handling_Python(Measurement_Callables_Class):
     """
     The calss handles noise matrix R.
     """
+
     def __init__(self, R, index, R_time_var_index, unique_R_number, dR=None):
         """
         Input:
@@ -225,7 +228,7 @@ class R_handling_Python(Measurement_Callables_Class):
         self.R_time_var_index = int(R_time_var_index)
         self.dR = dR
 
-        if (len(np.unique(index)) > unique_R_number):
+        if len(np.unique(index)) > unique_R_number:
             self.svd_each_time = True
         else:
             self.svd_each_time = False
@@ -248,32 +251,39 @@ class R_handling_Python(Measurement_Callables_Class):
         ind = int(self.index[self.R_time_var_index, k])
         R = self.R[:, :, ind]
 
-        if (R.shape[0] == 1):  # 1-D case handle simplier. No storage
+        if R.shape[0] == 1:  # 1-D case handle simplier. No storage
             # of the result, just compute it each time.
-            inv_square_root = np.sqrt(1.0/R)
+            inv_square_root = np.sqrt(1.0 / R)
         else:
             if self.svd_each_time:
+                (U, S, Vh) = sp.linalg.svd(
+                    R,
+                    full_matrices=False,
+                    compute_uv=True,
+                    overwrite_a=False,
+                    check_finite=True,
+                )
 
-                (U, S, Vh) = sp.linalg.svd(R, full_matrices=False,
-                                           compute_uv=True, overwrite_a=False,
-                                           check_finite=True)
-
-                inv_square_root = U * 1.0/np.sqrt(S)
+                inv_square_root = U * 1.0 / np.sqrt(S)
             else:
                 if ind in self.R_square_root:
                     inv_square_root = self.R_square_root[ind]
                 else:
-                    (U, S, Vh) = sp.linalg.svd(R, full_matrices=False,
-                                               compute_uv=True,
-                                               overwrite_a=False,
-                                               check_finite=True)
+                    (U, S, Vh) = sp.linalg.svd(
+                        R,
+                        full_matrices=False,
+                        compute_uv=True,
+                        overwrite_a=False,
+                        check_finite=True,
+                    )
 
-                    inv_square_root = U * 1.0/np.sqrt(S)
+                    inv_square_root = U * 1.0 / np.sqrt(S)
 
                     self.R_square_root[ind] = inv_square_root
 
         return inv_square_root
 
+
 if use_cython:
     R_handling_Class = state_space_cython.R_handling_Cython
 else:
@@ -281,11 +291,20 @@ else:
 
 
 class Std_Measurement_Callables_Python(R_handling_Class):
-
-    def __init__(self, H, H_time_var_index, R, index, R_time_var_index,
-                 unique_R_number, dH=None, dR=None):
-        super(Std_Measurement_Callables_Python,
-              self).__init__(R, index, R_time_var_index, unique_R_number, dR)
+    def __init__(
+        self,
+        H,
+        H_time_var_index,
+        R,
+        index,
+        R_time_var_index,
+        unique_R_number,
+        dH=None,
+        dR=None,
+    ):
+        super(Std_Measurement_Callables_Python, self).__init__(
+            R, index, R_time_var_index, unique_R_number, dR
+        )
 
         self.H = H
         self.H_time_var_index = int(H_time_var_index)
@@ -319,15 +338,16 @@ class Std_Measurement_Callables_Python(R_handling_Class):
 
         return self.dH  # the same dirivative on each iteration
 
+
 if use_cython:
-    Std_Measurement_Callables_Class = state_space_cython.\
-                                        Std_Measurement_Callables_Cython
+    Std_Measurement_Callables_Class = (
+        state_space_cython.Std_Measurement_Callables_Cython
+    )
 else:
     Std_Measurement_Callables_Class = Std_Measurement_Callables_Python
 
 
 class Q_handling_Python(Dynamic_Callables_Class):
-
     def __init__(self, Q, index, Q_time_var_index, unique_Q_number, dQ=None):
         """
         Input:
@@ -360,7 +380,7 @@ class Q_handling_Python(Dynamic_Callables_Class):
         self.Q_time_var_index = Q_time_var_index
         self.dQ = dQ
 
-        if (len(np.unique(index)) > unique_Q_number):
+        if len(np.unique(index)) > unique_Q_number:
             self.svd_each_time = True
         else:
             self.svd_each_time = False
@@ -391,27 +411,31 @@ class Q_handling_Python(Dynamic_Callables_Class):
         ind = self.index[self.Q_time_var_index, k]
         Q = self.Q[:, :, ind]
 
-        if (Q.shape[0] == 1):  # 1-D case handle simplier. No storage
+        if Q.shape[0] == 1:  # 1-D case handle simplier. No storage
             # of the result, just compute it each time.
             square_root = np.sqrt(Q)
         else:
             if self.svd_each_time:
-
-                (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False,
-                                           compute_uv=True,
-                                           overwrite_a=False,
-                                           check_finite=True)
+                (U, S, Vh) = sp.linalg.svd(
+                    Q,
+                    full_matrices=False,
+                    compute_uv=True,
+                    overwrite_a=False,
+                    check_finite=True,
+                )
 
                 square_root = U * np.sqrt(S)
             else:
-
                 if ind in self.Q_square_root:
                     square_root = self.Q_square_root[ind]
                 else:
-                    (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False,
-                                               compute_uv=True,
-                                               overwrite_a=False,
-                                               check_finite=True)
+                    (U, S, Vh) = sp.linalg.svd(
+                        Q,
+                        full_matrices=False,
+                        compute_uv=True,
+                        overwrite_a=False,
+                        check_finite=True,
+                    )
 
                     square_root = U * np.sqrt(S)
 
@@ -419,6 +443,7 @@ class Q_handling_Python(Dynamic_Callables_Class):
 
         return square_root
 
+
 if use_cython:
     Q_handling_Class = state_space_cython.Q_handling_Cython
 else:
@@ -426,11 +451,20 @@ else:
 
 
 class Std_Dynamic_Callables_Python(Q_handling_Class):
-
-    def __init__(self, A, A_time_var_index, Q, index, Q_time_var_index,
-                 unique_Q_number, dA=None, dQ=None):
-        super(Std_Dynamic_Callables_Python,
-              self).__init__(Q, index, Q_time_var_index, unique_Q_number, dQ)
+    def __init__(
+        self,
+        A,
+        A_time_var_index,
+        Q,
+        index,
+        Q_time_var_index,
+        unique_Q_number,
+        dA=None,
+        dQ=None,
+    ):
+        super(Std_Dynamic_Callables_Python, self).__init__(
+            Q, index, Q_time_var_index, unique_Q_number, dQ
+        )
 
         self.A = A
         self.A_time_var_index = np.asarray(A_time_var_index, np.int_)
@@ -438,11 +472,11 @@ class Std_Dynamic_Callables_Python(Q_handling_Class):
 
     def f_a(self, k, m, A):
         """
-            f_a: function (k, x_{k-1}, A_{k}). Dynamic function.
-            k (iteration number), starts at 0
-            x_{k-1} State from the previous step
-            A_{k} Jacobian matrices of f_a. In the linear case it is exactly
-            A_{k}.
+        f_a: function (k, x_{k-1}, A_{k}). Dynamic function.
+        k (iteration number), starts at 0
+        x_{k-1} State from the previous step
+        A_{k} Jacobian matrices of f_a. In the linear case it is exactly
+        A_{k}.
         """
         return np.dot(A, m)
 
@@ -471,16 +505,15 @@ class Std_Dynamic_Callables_Python(Q_handling_Class):
 
         return self
 
+
 if use_cython:
-    Std_Dynamic_Callables_Class = state_space_cython.\
-                                  Std_Dynamic_Callables_Cython
+    Std_Dynamic_Callables_Class = state_space_cython.Std_Dynamic_Callables_Cython
 else:
     Std_Dynamic_Callables_Class = Std_Dynamic_Callables_Python
 
 
 class AddMethodToClass(object):
-
-    def __init__(self, func=None, tp='staticmethod'):
+    def __init__(self, func=None, tp="staticmethod"):
         """
         Input:
         --------------
@@ -495,16 +528,18 @@ class AddMethodToClass(object):
         self.tp = tp
 
     def __get__(self, obj, klass=None, *args, **kwargs):
-
-        if self.tp == 'staticmethod':
+        if self.tp == "staticmethod":
             return self.func
-        elif self.tp == 'normal':
+        elif self.tp == "normal":
+
             def newfunc(obj, *args, **kwargs):
                 return self.func
 
-        elif self.tp == 'classmethod':
+        elif self.tp == "classmethod":
+
             def newfunc(klass, *args, **kwargs):
                 return self.func
+
         return newfunc
 
 
@@ -519,23 +554,24 @@ class DescreteStateSpaceMeta(type):
         """
 
         if use_cython:
-            if '_kalman_prediction_step_SVD' in attributes:
-                attributes['_kalman_prediction_step_SVD'] =\
-                                    AddMethodToClass(state_space_cython.
-                                        _kalman_prediction_step_SVD_Cython)
+            if "_kalman_prediction_step_SVD" in attributes:
+                attributes["_kalman_prediction_step_SVD"] = AddMethodToClass(
+                    state_space_cython._kalman_prediction_step_SVD_Cython
+                )
 
-            if '_kalman_update_step_SVD' in attributes:
-                attributes['_kalman_update_step_SVD'] =\
-                                    AddMethodToClass(state_space_cython.
-                                        _kalman_update_step_SVD_Cython)
+            if "_kalman_update_step_SVD" in attributes:
+                attributes["_kalman_update_step_SVD"] = AddMethodToClass(
+                    state_space_cython._kalman_update_step_SVD_Cython
+                )
 
-            if '_cont_discr_kalman_filter_raw' in attributes:
-                attributes['_cont_discr_kalman_filter_raw'] =\
-                                    AddMethodToClass(state_space_cython.
-                                        _cont_discr_kalman_filter_raw_Cython)
+            if "_cont_discr_kalman_filter_raw" in attributes:
+                attributes["_cont_discr_kalman_filter_raw"] = AddMethodToClass(
+                    state_space_cython._cont_discr_kalman_filter_raw_Cython
+                )
 
-        return super(DescreteStateSpaceMeta,
-                     typeclass).__new__(typeclass, name, bases, attributes)
+        return super(DescreteStateSpaceMeta, typeclass).__new__(
+            typeclass, name, bases, attributes
+        )
 
 
 class DescreteStateSpace(object):
@@ -560,6 +596,7 @@ class DescreteStateSpace(object):
     implementations are very similar.
 
     """
+
     __metaclass__ = DescreteStateSpaceMeta
 
     @staticmethod
@@ -586,37 +623,56 @@ class DescreteStateSpace(object):
                 None.
         """
 
-        if (len(shape) > 3):
-            raise ValueError("""Input array is not supposed to be more
-                                than 3 dimensional.""")
+        if len(shape) > 3:
+            raise ValueError(
+                """Input array is not supposed to be more
+                                than 3 dimensional."""
+            )
 
-        if (len(shape) > desired_dim):
+        if len(shape) > desired_dim:
             raise ValueError("Input array shape is more than desired shape.")
         elif len(shape) == 1:
-            if (desired_dim == 3):
+            if desired_dim == 3:
                 return ((shape[0], 1, 1), shape)  # last dimension is the
                 # time serime_series_no
-            elif (desired_dim == 2):
+            elif desired_dim == 2:
                 return ((shape[0], 1), shape)
 
         elif len(shape) == 2:
-            if (desired_dim == 3):
-                return ((shape[1], 1, 1), shape) if (shape[0] == 1) else\
-                    ((shape[0], shape[1], 1), shape)  # convert to column
-                                                      # vector
-            elif (desired_dim == 2):
-                return ((shape[1], 1), shape) if (shape[0] == 1) else\
-                    ((shape[0], shape[1]), None)  # convert to column vector
+            if desired_dim == 3:
+                return (
+                    ((shape[1], 1, 1), shape)
+                    if (shape[0] == 1)
+                    else ((shape[0], shape[1], 1), shape)
+                )  # convert to column
+                # vector
+            elif desired_dim == 2:
+                return (
+                    ((shape[1], 1), shape)
+                    if (shape[0] == 1)
+                    else ((shape[0], shape[1]), None)
+                )  # convert to column vector
 
         else:  # len(shape) == 3
             return (shape, None)  # do nothing
 
     @classmethod
-    def kalman_filter(cls, p_A, p_Q, p_H, p_R, Y, index=None, m_init=None,
-                      P_init=None, p_kalman_filter_type='regular',
-                      calc_log_likelihood=False,
-                      calc_grad_log_likelihood=False, grad_params_no=None,
-                      grad_calc_params=None):
+    def kalman_filter(
+        cls,
+        p_A,
+        p_Q,
+        p_H,
+        p_R,
+        Y,
+        index=None,
+        m_init=None,
+        P_init=None,
+        p_kalman_filter_type="regular",
+        calc_log_likelihood=False,
+        calc_grad_log_likelihood=False,
+        grad_params_no=None,
+        grad_calc_params=None,
+    ):
         """
         This function implements the basic Kalman Filter algorithm
         These notations for the State-Space model are assumed:
@@ -743,7 +799,7 @@ class DescreteStateSpace(object):
             The dictionary contains the same fields.
         """
 
-        #import pdb; pdb.set_trace()
+        # import pdb; pdb.set_trace()
 
         # Parameters checking ->
         # index
@@ -753,14 +809,16 @@ class DescreteStateSpace(object):
         p_R = np.atleast_1d(p_R)
 
         # Reshape and check measurements:
-        Y.shape, old_Y_shape  = cls._reshape_input_data(Y.shape)
+        Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape)
         measurement_dim = Y.shape[1]
-        time_series_no = Y.shape[2] # multiple time series mode
+        time_series_no = Y.shape[2]  # multiple time series mode
 
-        if ((len(p_A.shape) == 3) and (len(p_A.shape[2]) != 1)) or\
-            ((len(p_Q.shape) == 3) and (len(p_Q.shape[2]) != 1)) or\
-            ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\
-            ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)):
+        if (
+            ((len(p_A.shape) == 3) and (len(p_A.shape[2]) != 1))
+            or ((len(p_Q.shape) == 3) and (len(p_Q.shape[2]) != 1))
+            or ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1))
+            or ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1))
+        ):
             model_matrices_chage_with_time = True
         else:
             model_matrices_chage_with_time = False
@@ -768,35 +826,55 @@ class DescreteStateSpace(object):
         # Check index
         old_index_shape = None
         if index is None:
-            if (len(p_A.shape) == 3) or (len(p_Q.shape) == 3) or\
-                (len(p_H.shape) == 3) or (len(p_R.shape) == 3):
-                raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)")
-            else: # matrices do not change in time, so form dummy zero indices.
-                index = np.zeros((1,Y.shape[0]))
+            if (
+                (len(p_A.shape) == 3)
+                or (len(p_Q.shape) == 3)
+                or (len(p_H.shape) == 3)
+                or (len(p_R.shape) == 3)
+            ):
+                raise ValueError(
+                    "Parameter index can not be None for time varying matrices (third dimension is present)"
+                )
+            else:  # matrices do not change in time, so form dummy zero indices.
+                index = np.zeros((1, Y.shape[0]))
         else:
             if len(index.shape) == 1:
-                index.shape = (1,index.shape[0])
+                index.shape = (1, index.shape[0])
                 old_index_shape = (index.shape[0],)
 
-            if (index.shape[1] != Y.shape[0]):
-                raise ValueError("Number of measurements must be equal the number of A_{k}, Q_{k}, H_{k}, R_{k}")
+            if index.shape[1] != Y.shape[0]:
+                raise ValueError(
+                    "Number of measurements must be equal the number of A_{k}, Q_{k}, H_{k}, R_{k}"
+                )
 
-        if (index.shape[0] == 1):
-            A_time_var_index = 0; Q_time_var_index = 0
-            H_time_var_index = 0; R_time_var_index = 0
-        elif (index.shape[0] == 4):
-            A_time_var_index = 0; Q_time_var_index = 1
-            H_time_var_index = 2; R_time_var_index = 3
+        if index.shape[0] == 1:
+            A_time_var_index = 0
+            Q_time_var_index = 0
+            H_time_var_index = 0
+            R_time_var_index = 0
+        elif index.shape[0] == 4:
+            A_time_var_index = 0
+            Q_time_var_index = 1
+            H_time_var_index = 2
+            R_time_var_index = 3
         else:
             raise ValueError("First Dimension of index must be either 1 or 4.")
 
         state_dim = p_A.shape[0]
         # Check and make right shape for model matrices. On exit they all are 3 dimensional. Last dimension
         # correspond to change in time.
-        (p_A, old_A_shape) = cls._check_SS_matrix(p_A, state_dim, measurement_dim, which='A')
-        (p_Q, old_Q_shape) = cls._check_SS_matrix(p_Q, state_dim, measurement_dim, which='Q')
-        (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H')
-        (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R')
+        (p_A, old_A_shape) = cls._check_SS_matrix(
+            p_A, state_dim, measurement_dim, which="A"
+        )
+        (p_Q, old_Q_shape) = cls._check_SS_matrix(
+            p_Q, state_dim, measurement_dim, which="Q"
+        )
+        (p_H, old_H_shape) = cls._check_SS_matrix(
+            p_H, state_dim, measurement_dim, which="H"
+        )
+        (p_R, old_R_shape) = cls._check_SS_matrix(
+            p_R, state_dim, measurement_dim, which="R"
+        )
 
         # m_init
         if m_init is None:
@@ -807,10 +885,10 @@ class DescreteStateSpace(object):
         # P_init
         if P_init is None:
             P_init = np.eye(state_dim)
-        elif not isinstance(P_init, collections.Iterable): #scalar
-            P_init = P_init*np.eye(state_dim)
+        elif not isinstance(P_init, collections.Iterable):  # scalar
+            P_init = P_init * np.eye(state_dim)
 
-        if p_kalman_filter_type not in ('regular', 'svd'):
+        if p_kalman_filter_type not in ("regular", "svd"):
             raise ValueError("Kalman filer type neither 'regular nor 'svd'.")
 
         # Functions to pass to the kalman_filter algorithm:
@@ -818,27 +896,51 @@ class DescreteStateSpace(object):
         # k - number of Kalman filter iteration
         # m - vector for calculating matrices. Required for EKF. Not used here.
 
-        c_p_A = p_A.copy() # create a copy because this object is passed to the smoother
-        c_p_Q = p_Q.copy() # create a copy because this object is passed to the smoother
-        c_index = index.copy() # create a copy because this object is passed to the smoother
+        c_p_A = (
+            p_A.copy()
+        )  # create a copy because this object is passed to the smoother
+        c_p_Q = (
+            p_Q.copy()
+        )  # create a copy because this object is passed to the smoother
+        c_index = (
+            index.copy()
+        )  # create a copy because this object is passed to the smoother
 
         if calc_grad_log_likelihood:
             if model_matrices_chage_with_time:
-                raise ValueError("When computing likelihood gradient A and Q can not change over time.")
+                raise ValueError(
+                    "When computing likelihood gradient A and Q can not change over time."
+                )
 
-            dA = cls._check_grad_state_matrices(grad_calc_params.get('dA'), state_dim, grad_params_no, which = 'dA')
-            dQ = cls._check_grad_state_matrices(grad_calc_params.get('dQ'), state_dim, grad_params_no, which = 'dQ')
-            dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH')
-            dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR')
+            dA = cls._check_grad_state_matrices(
+                grad_calc_params.get("dA"), state_dim, grad_params_no, which="dA"
+            )
+            dQ = cls._check_grad_state_matrices(
+                grad_calc_params.get("dQ"), state_dim, grad_params_no, which="dQ"
+            )
+            dH = cls._check_grad_measurement_matrices(
+                grad_calc_params.get("dH"),
+                state_dim,
+                grad_params_no,
+                measurement_dim,
+                which="dH",
+            )
+            dR = cls._check_grad_measurement_matrices(
+                grad_calc_params.get("dR"),
+                state_dim,
+                grad_params_no,
+                measurement_dim,
+                which="dR",
+            )
 
-            dm_init = grad_calc_params.get('dm_init')
+            dm_init = grad_calc_params.get("dm_init")
             if dm_init is None:
-                 # multiple time series mode. Keep grad_params always as a last dimension
+                # multiple time series mode. Keep grad_params always as a last dimension
                 dm_init = np.zeros((state_dim, time_series_no, grad_params_no))
 
-            dP_init = grad_calc_params.get('dP_init')
+            dP_init = grad_calc_params.get("dP_init")
             if dP_init is None:
-                dP_init = np.zeros((state_dim,state_dim,grad_params_no))
+                dP_init = np.zeros((state_dim, state_dim, grad_params_no))
         else:
             dA = None
             dQ = None
@@ -847,17 +949,33 @@ class DescreteStateSpace(object):
             dm_init = None
             dP_init = None
 
-        dynamic_callables = Std_Dynamic_Callables_Class(c_p_A, A_time_var_index, c_p_Q, c_index, Q_time_var_index, 20, dA, dQ)
-        measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR)
+        dynamic_callables = Std_Dynamic_Callables_Class(
+            c_p_A, A_time_var_index, c_p_Q, c_index, Q_time_var_index, 20, dA, dQ
+        )
+        measurement_callables = Std_Measurement_Callables_Class(
+            p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR
+        )
 
-        (M, P,log_likelihood, grad_log_likelihood, dynamic_callables) = \
-            cls._kalman_algorithm_raw(state_dim, dynamic_callables,
-                                    measurement_callables, Y, m_init,
-                                    P_init, p_kalman_filter_type = p_kalman_filter_type,
-                                    calc_log_likelihood=calc_log_likelihood,
-                                    calc_grad_log_likelihood=calc_grad_log_likelihood,
-                                    grad_params_no=grad_params_no,
-                                    dm_init=dm_init, dP_init=dP_init)
+        (
+            M,
+            P,
+            log_likelihood,
+            grad_log_likelihood,
+            dynamic_callables,
+        ) = cls._kalman_algorithm_raw(
+            state_dim,
+            dynamic_callables,
+            measurement_callables,
+            Y,
+            m_init,
+            P_init,
+            p_kalman_filter_type=p_kalman_filter_type,
+            calc_log_likelihood=calc_log_likelihood,
+            calc_grad_log_likelihood=calc_grad_log_likelihood,
+            grad_params_no=grad_params_no,
+            dm_init=dm_init,
+            dP_init=dP_init,
+        )
 
         # restore shapes so that input parameters are unchenged
         if old_index_shape is not None:
@@ -879,12 +997,23 @@ class DescreteStateSpace(object):
             p_R.shape = old_R_shape
         # Return values
 
-        return (M, P,log_likelihood, grad_log_likelihood, dynamic_callables)
+        return (M, P, log_likelihood, grad_log_likelihood, dynamic_callables)
 
     @classmethod
-    def extended_kalman_filter(cls,p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init=None,
-                          P_init=None,calc_log_likelihood=False):
-
+    def extended_kalman_filter(
+        cls,
+        p_state_dim,
+        p_a,
+        p_f_A,
+        p_f_Q,
+        p_h,
+        p_f_H,
+        p_f_R,
+        Y,
+        m_init=None,
+        P_init=None,
+        calc_log_likelihood=False,
+    ):
         """
         Extended Kalman Filter
 
@@ -954,83 +1083,95 @@ class DescreteStateSpace(object):
         """
 
         # Y
-        Y.shape, old_Y_shape  =  cls._reshape_input_data(Y.shape)
+        Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape)
 
-         # m_init
+        # m_init
         if m_init is None:
-            m_init = np.zeros((p_state_dim,1))
+            m_init = np.zeros((p_state_dim, 1))
         else:
             m_init = np.atleast_2d(m_init).T
 
         # P_init
         if P_init is None:
             P_init = np.eye(p_state_dim)
-        elif not isinstance(P_init, collections.Iterable): #scalar
-            P_init = P_init*np.eye(p_state_dim)
+        elif not isinstance(P_init, collections.Iterable):  # scalar
+            P_init = P_init * np.eye(p_state_dim)
 
         if p_a is None:
-            p_a = lambda k,m,A: np.dot(A, m)
+            p_a = lambda k, m, A: np.dot(A, m)
 
         old_A_shape = None
-        if not isinstance(p_f_A, types.FunctionType): # not a function but array
+        if not isinstance(p_f_A, types.FunctionType):  # not a function but array
             p_f_A = np.atleast_1d(p_f_A)
             (p_A, old_A_shape) = cls._check_A_matrix(p_f_A)
 
-            p_f_A = lambda k, m, P: p_A[:,:, 0] # make function
+            p_f_A = lambda k, m, P: p_A[:, :, 0]  # make function
         else:
             if p_f_A(1, m_init, P_init).shape[0] != m_init.shape[0]:
                 raise ValueError("p_f_A function returns matrix of wrong size")
 
         old_Q_shape = None
-        if not isinstance(p_f_Q, types.FunctionType): # not a function but array
+        if not isinstance(p_f_Q, types.FunctionType):  # not a function but array
             p_f_Q = np.atleast_1d(p_f_Q)
             (p_Q, old_Q_shape) = cls._check_Q_matrix(p_f_Q)
 
-            p_f_Q = lambda k: p_Q[:,:, 0] # make function
+            p_f_Q = lambda k: p_Q[:, :, 0]  # make function
         else:
             if p_f_Q(1).shape[0] != m_init.shape[0]:
                 raise ValueError("p_f_Q function returns matrix of wrong size")
 
         if p_h is None:
-            lambda k,m,H: np.dot(H, m)
+            lambda k, m, H: np.dot(H, m)
 
         old_H_shape = None
-        if not isinstance(p_f_H, types.FunctionType): # not a function but array
+        if not isinstance(p_f_H, types.FunctionType):  # not a function but array
             p_f_H = np.atleast_1d(p_f_H)
             (p_H, old_H_shape) = cls._check_H_matrix(p_f_H)
 
-            p_f_H = lambda k, m, P: p_H # make function
+            p_f_H = lambda k, m, P: p_H  # make function
         else:
             if p_f_H(1, m_init, P_init).shape[0] != Y.shape[1]:
                 raise ValueError("p_f_H function returns matrix of wrong size")
 
         old_R_shape = None
-        if not isinstance(p_f_R, types.FunctionType): # not a function but array
+        if not isinstance(p_f_R, types.FunctionType):  # not a function but array
             p_f_R = np.atleast_1d(p_f_R)
             (p_R, old_R_shape) = cls._check_H_matrix(p_f_R)
 
-            p_f_R = lambda k: p_R # make function
+            p_f_R = lambda k: p_R  # make function
         else:
             if p_f_R(1).shape[0] != m_init.shape[0]:
                 raise ValueError("p_f_R function returns matrix of wrong size")
 
-#        class dynamic_callables_class(Dynamic_Model_Callables):
-#
-#            Ak =
-#            Qk =
-
+        #        class dynamic_callables_class(Dynamic_Model_Callables):
+        #
+        #            Ak =
+        #            Qk =
 
         class measurement_callables_class(R_handling_Class):
-            def __init__(self,R, index, R_time_var_index, unique_R_number):
-                super(measurement_callables_class,self).__init__(R, index, R_time_var_index, unique_R_number)
+            def __init__(self, R, index, R_time_var_index, unique_R_number):
+                super(measurement_callables_class, self).__init__(
+                    R, index, R_time_var_index, unique_R_number
+                )
 
             Hk = AddMethodToClass(f_H)
             f_h = AddMethodToClass(f_hl)
 
-
-        (M, P,log_likelihood, grad_log_likelihood)  = cls._kalman_algorithm_raw(p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init,
-                          P_init, calc_log_likelihood,
-                          calc_grad_log_likelihood=False, grad_calc_params=None)
+        (M, P, log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw(
+            p_state_dim,
+            p_a,
+            p_f_A,
+            p_f_Q,
+            p_h,
+            p_f_H,
+            p_f_R,
+            Y,
+            m_init,
+            P_init,
+            calc_log_likelihood,
+            calc_grad_log_likelihood=False,
+            grad_calc_params=None,
+        )
 
         if old_Y_shape is not None:
             Y.shape = old_Y_shape
@@ -1050,11 +1191,21 @@ class DescreteStateSpace(object):
         return (M, P)
 
     @classmethod
-    def _kalman_algorithm_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, Y, m_init,
-                          P_init, p_kalman_filter_type='regular',
-                          calc_log_likelihood=False,
-                          calc_grad_log_likelihood=False, grad_params_no=None,
-                          dm_init=None, dP_init=None):
+    def _kalman_algorithm_raw(
+        cls,
+        state_dim,
+        p_dynamic_callables,
+        p_measurement_callables,
+        Y,
+        m_init,
+        P_init,
+        p_kalman_filter_type="regular",
+        calc_log_likelihood=False,
+        calc_grad_log_likelihood=False,
+        grad_params_no=None,
+        dm_init=None,
+        dP_init=None,
+    ):
         """
         General nonlinear filtering algorithm for inference in the state-space
         model:
@@ -1166,94 +1317,142 @@ class DescreteStateSpace(object):
 
         """
 
-        steps_no = Y.shape[0] # number of steps in the Kalman Filter
-        time_series_no = Y.shape[2] # multiple time series mode
+        steps_no = Y.shape[0]  # number of steps in the Kalman Filter
+        time_series_no = Y.shape[2]  # multiple time series mode
 
         # Allocate space for results
         # Mean estimations. Initial values will be included
-        M = np.empty(((steps_no+1),state_dim,time_series_no))
-        M[0,:,:] = m_init # Initialize mean values
+        M = np.empty(((steps_no + 1), state_dim, time_series_no))
+        M[0, :, :] = m_init  # Initialize mean values
         # Variance estimations. Initial values will be included
-        P = np.empty(((steps_no+1),state_dim,state_dim))
-        P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful
-        P[0,:,:] = P_init # Initialize initial covariance matrix
+        P = np.empty(((steps_no + 1), state_dim, state_dim))
+        P_init = 0.5 * (
+            P_init + P_init.T
+        )  # symmetrize initial covariance. In some ustable cases this is uiseful
+        P[0, :, :] = P_init  # Initialize initial covariance matrix
 
-        if p_kalman_filter_type == 'svd':
-            (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True,
-                      overwrite_a=False,check_finite=True)
-            S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance
-            P_upd = (P_init, S,U)
+        if p_kalman_filter_type == "svd":
+            (U, S, Vh) = sp.linalg.svd(
+                P_init,
+                full_matrices=False,
+                compute_uv=True,
+                overwrite_a=False,
+                check_finite=True,
+            )
+            S[(S == 0)] = 1e-17  # allows to run algorithm for singular initial variance
+            P_upd = (P_init, S, U)
 
         log_likelihood = 0 if calc_log_likelihood else None
         grad_log_likelihood = 0 if calc_grad_log_likelihood else None
 
-        #setting initial values for derivatives update
+        # setting initial values for derivatives update
         dm_upd = dm_init
         dP_upd = dP_init
         # Main loop of the Kalman filter
-        for k in range(0,steps_no):
+        for k in range(0, steps_no):
             # In this loop index for new estimations is (k+1), old - (k)
             # This happened because initial values are stored at 0-th index.
 
-            prev_mean = M[k,:,:] # mean from the previous step
+            prev_mean = M[k, :, :]  # mean from the previous step
 
-            if p_kalman_filter_type == 'svd':
-                m_pred, P_pred, dm_pred, dP_pred = \
-                cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables,
+            if p_kalman_filter_type == "svd":
+                m_pred, P_pred, dm_pred, dP_pred = cls._kalman_prediction_step_SVD(
+                    k,
+                    prev_mean,
+                    P_upd,
+                    p_dynamic_callables,
                     calc_grad_log_likelihood=calc_grad_log_likelihood,
-                    p_dm = dm_upd, p_dP = dP_upd)
+                    p_dm=dm_upd,
+                    p_dP=dP_upd,
+                )
             else:
-                m_pred, P_pred, dm_pred, dP_pred = \
-                cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables,
+                m_pred, P_pred, dm_pred, dP_pred = cls._kalman_prediction_step(
+                    k,
+                    prev_mean,
+                    P[k, :, :],
+                    p_dynamic_callables,
                     calc_grad_log_likelihood=calc_grad_log_likelihood,
-                    p_dm = dm_upd, p_dP = dP_upd )
+                    p_dm=dm_upd,
+                    p_dP=dP_upd,
+                )
 
-            k_measurment = Y[k,:,:]
+            k_measurment = Y[k, :, :]
 
-            if (np.any(np.isnan(k_measurment)) == False):
-                if p_kalman_filter_type == 'svd':
-                    m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
-                    cls._kalman_update_step_SVD(k,  m_pred , P_pred, p_measurement_callables,
-                            k_measurment, calc_log_likelihood=calc_log_likelihood,
-                            calc_grad_log_likelihood=calc_grad_log_likelihood,
-                            p_dm = dm_pred, p_dP = dP_pred )
+            if np.any(np.isnan(k_measurment)) == False:
+                if p_kalman_filter_type == "svd":
+                    (
+                        m_upd,
+                        P_upd,
+                        log_likelihood_update,
+                        dm_upd,
+                        dP_upd,
+                        d_log_likelihood_update,
+                    ) = cls._kalman_update_step_SVD(
+                        k,
+                        m_pred,
+                        P_pred,
+                        p_measurement_callables,
+                        k_measurment,
+                        calc_log_likelihood=calc_log_likelihood,
+                        calc_grad_log_likelihood=calc_grad_log_likelihood,
+                        p_dm=dm_pred,
+                        p_dP=dP_pred,
+                    )
 
-
-    #                m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
-    #                cls._kalman_update_step(k,  m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment,
-    #                        calc_log_likelihood=calc_log_likelihood,
-    #                        calc_grad_log_likelihood=calc_grad_log_likelihood,
-    #                        p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR))
-    #
-    #                (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True,
-    #                      overwrite_a=False,check_finite=True)
-    #                P_upd = (P_upd, S,U)
+                #                m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
+                #                cls._kalman_update_step(k,  m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment,
+                #                        calc_log_likelihood=calc_log_likelihood,
+                #                        calc_grad_log_likelihood=calc_grad_log_likelihood,
+                #                        p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR))
+                #
+                #                (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True,
+                #                      overwrite_a=False,check_finite=True)
+                #                P_upd = (P_upd, S,U)
                 else:
-                    m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
-                    cls._kalman_update_step(k,  m_pred , P_pred, p_measurement_callables, k_measurment,
-                            calc_log_likelihood=calc_log_likelihood,
-                            calc_grad_log_likelihood=calc_grad_log_likelihood,
-                            p_dm = dm_pred, p_dP = dP_pred )
+                    (
+                        m_upd,
+                        P_upd,
+                        log_likelihood_update,
+                        dm_upd,
+                        dP_upd,
+                        d_log_likelihood_update,
+                    ) = cls._kalman_update_step(
+                        k,
+                        m_pred,
+                        P_pred,
+                        p_measurement_callables,
+                        k_measurment,
+                        calc_log_likelihood=calc_log_likelihood,
+                        calc_grad_log_likelihood=calc_grad_log_likelihood,
+                        p_dm=dm_pred,
+                        p_dP=dP_pred,
+                    )
 
             else:
-#                if k_measurment.shape != (1,1):
-#                    raise ValueError("Nan measurements are currently not supported for \
-#                                     multidimensional output and multiple time series.")
-#                else:
-#                    m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
-#                    log_likelihood_update = 0.0;
-#                    d_log_likelihood_update = 0.0;
+                #                if k_measurment.shape != (1,1):
+                #                    raise ValueError("Nan measurements are currently not supported for \
+                #                                     multidimensional output and multiple time series.")
+                #                else:
+                #                    m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
+                #                    log_likelihood_update = 0.0;
+                #                    d_log_likelihood_update = 0.0;
 
                 if not np.all(np.isnan(k_measurment)):
-                    raise ValueError("""Nan measurements are currently not supported if
-                                     they are intermixed with not NaN measurements""")
+                    raise ValueError(
+                        """Nan measurements are currently not supported if
+                                     they are intermixed with not NaN measurements"""
+                    )
                 else:
-                    m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
+                    m_upd = m_pred
+                    P_upd = P_pred
+                    dm_upd = dm_pred
+                    dP_upd = dP_pred
                     if calc_log_likelihood:
                         log_likelihood_update = np.zeros((time_series_no,))
                     if calc_grad_log_likelihood:
-                        d_log_likelihood_update = np.zeros((grad_params_no,time_series_no))
-
+                        d_log_likelihood_update = np.zeros(
+                            (grad_params_no, time_series_no)
+                        )
 
             if calc_log_likelihood:
                 log_likelihood += log_likelihood_update
@@ -1261,20 +1460,33 @@ class DescreteStateSpace(object):
             if calc_grad_log_likelihood:
                 grad_log_likelihood += d_log_likelihood_update
 
-            M[k+1,:,:] = m_upd # separate mean value for each time series
+            M[k + 1, :, :] = m_upd  # separate mean value for each time series
 
-            if p_kalman_filter_type == 'svd':
-                P[k+1,:,:] = P_upd[0]
+            if p_kalman_filter_type == "svd":
+                P[k + 1, :, :] = P_upd[0]
             else:
-                P[k+1,:,:] = P_upd
+                P[k + 1, :, :] = P_upd
 
         # !!!Print statistics! Print sizes of matrices
         # !!!Print statistics! Print iteration time base on another boolean variable
-        return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False))
+        return (
+            M,
+            P,
+            log_likelihood,
+            grad_log_likelihood,
+            p_dynamic_callables.reset(False),
+        )
 
     @staticmethod
-    def _kalman_prediction_step(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False,
-                                p_dm = None, p_dP = None):
+    def _kalman_prediction_step(
+        k,
+        p_m,
+        p_P,
+        p_dyn_model_callable,
+        calc_grad_log_likelihood=False,
+        p_dm=None,
+        p_dP=None,
+    ):
         """
         Desctrete prediction function
 
@@ -1315,17 +1527,23 @@ class DescreteStateSpace(object):
         """
 
         # index correspond to values from previous iteration.
-        A = p_dyn_model_callable.Ak(k,p_m,p_P) # state transition matrix (or Jacobian)
-        Q = p_dyn_model_callable.Qk(k) # state noise matrix
+        A = p_dyn_model_callable.Ak(
+            k, p_m, p_P
+        )  # state transition matrix (or Jacobian)
+        Q = p_dyn_model_callable.Qk(k)  # state noise matrix
 
         # Prediction step ->
-        m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean
-        P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance
+        m_pred = p_dyn_model_callable.f_a(k, p_m, A)  # predicted mean
+        P_pred = A.dot(p_P).dot(A.T) + Q  # predicted variance
         # Prediction step <-
 
         if calc_grad_log_likelihood:
-            dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters
-            dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters
+            dA_all_params = p_dyn_model_callable.dAk(
+                k
+            )  # derivatives of A wrt parameters
+            dQ_all_params = p_dyn_model_callable.dQk(
+                k
+            )  # derivatives of Q wrt parameters
 
             param_number = p_dP.shape[2]
 
@@ -1334,19 +1552,21 @@ class DescreteStateSpace(object):
             dP_pred = np.empty(p_dP.shape)
 
             for j in range(param_number):
-                dA = dA_all_params[:,:,j]
-                dQ = dQ_all_params[:,:,j]
+                dA = dA_all_params[:, :, j]
+                dQ = dQ_all_params[:, :, j]
 
-                dP = p_dP[:,:,j]
-                dm = p_dm[:,:,j]
-                dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, dm)
+                dP = p_dP[:, :, j]
+                dm = p_dm[:, :, j]
+                dm_pred[:, :, j] = np.dot(dA, p_m) + np.dot(A, dm)
                 # prediction step derivatives for current parameter:
 
-                dP_pred[:,:,j] = np.dot( dA ,np.dot(p_P, A.T))
-                dP_pred[:,:,j] += dP_pred[:,:,j].T
-                dP_pred[:,:,j] += np.dot( A ,np.dot(dP, A.T)) + dQ
+                dP_pred[:, :, j] = np.dot(dA, np.dot(p_P, A.T))
+                dP_pred[:, :, j] += dP_pred[:, :, j].T
+                dP_pred[:, :, j] += np.dot(A, np.dot(dP, A.T)) + dQ
 
-                dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize
+                dP_pred[:, :, j] = 0.5 * (
+                    dP_pred[:, :, j] + dP_pred[:, :, j].T
+                )  # symmetrize
         else:
             dm_pred = None
             dP_pred = None
@@ -1354,8 +1574,15 @@ class DescreteStateSpace(object):
         return m_pred, P_pred, dm_pred, dP_pred
 
     @staticmethod
-    def _kalman_prediction_step_SVD(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False,
-                                p_dm = None, p_dP = None):
+    def _kalman_prediction_step_SVD(
+        k,
+        p_m,
+        p_P,
+        p_dyn_model_callable,
+        calc_grad_log_likelihood=False,
+        p_dm=None,
+        p_dP=None,
+    ):
         """
         Desctrete prediction function
 
@@ -1398,33 +1625,46 @@ class DescreteStateSpace(object):
         # covariance from the previous step and its SVD decomposition
         # p_prev_cov = v * S * V.T
         Prev_cov, S_old, V_old = p_P
-        #p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step
+        # p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step
 
         # index correspond to values from previous iteration.
-        A = p_dyn_model_callable.Ak(k,p_m,Prev_cov) # state transition matrix (or Jacobian)
-        Q = p_dyn_model_callable.Qk(k) # state noise matrx. This is necessary for the square root calculation (next step)
+        A = p_dyn_model_callable.Ak(
+            k, p_m, Prev_cov
+        )  # state transition matrix (or Jacobian)
+        Q = p_dyn_model_callable.Qk(
+            k
+        )  # state noise matrx. This is necessary for the square root calculation (next step)
         Q_sr = p_dyn_model_callable.Q_srk(k)
         # Prediction step ->
-        m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean
+        m_pred = p_dyn_model_callable.f_a(k, p_m, A)  # predicted mean
 
         # coavariance prediction have changed:
-        svd_1_matr = np.vstack( ( (np.sqrt(S_old)* np.dot(A,V_old)).T , Q_sr.T) )
-        (U,S,Vh) = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True,
-                      overwrite_a=False,check_finite=True)
+        svd_1_matr = np.vstack(((np.sqrt(S_old) * np.dot(A, V_old)).T, Q_sr.T))
+        (U, S, Vh) = sp.linalg.svd(
+            svd_1_matr,
+            full_matrices=False,
+            compute_uv=True,
+            overwrite_a=False,
+            check_finite=True,
+        )
 
         # predicted variance computed by the regular method. For testing
-        #P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q
+        # P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q
         V_new = Vh.T
         S_new = S**2
 
-        P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance
+        P_pred = np.dot(V_new * S_new, V_new.T)  # prediction covariance
         P_pred = (P_pred, S_new, Vh.T)
         # Prediction step <-
 
         # derivatives
         if calc_grad_log_likelihood:
-            dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters
-            dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters
+            dA_all_params = p_dyn_model_callable.dAk(
+                k
+            )  # derivatives of A wrt parameters
+            dQ_all_params = p_dyn_model_callable.dQk(
+                k
+            )  # derivatives of Q wrt parameters
 
             param_number = p_dP.shape[2]
 
@@ -1433,20 +1673,21 @@ class DescreteStateSpace(object):
             dP_pred = np.empty(p_dP.shape)
 
             for j in range(param_number):
-                dA = dA_all_params[:,:,j]
-                dQ = dQ_all_params[:,:,j]
+                dA = dA_all_params[:, :, j]
+                dQ = dQ_all_params[:, :, j]
 
-                #dP = p_dP[:,:,j]
-                #dm = p_dm[:,:,j]
-                dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, p_dm[:,:,j])
+                # dP = p_dP[:,:,j]
+                # dm = p_dm[:,:,j]
+                dm_pred[:, :, j] = np.dot(dA, p_m) + np.dot(A, p_dm[:, :, j])
                 # prediction step derivatives for current parameter:
 
+                dP_pred[:, :, j] = np.dot(dA, np.dot(Prev_cov, A.T))
+                dP_pred[:, :, j] += dP_pred[:, :, j].T
+                dP_pred[:, :, j] += np.dot(A, np.dot(p_dP[:, :, j], A.T)) + dQ
 
-                dP_pred[:,:,j] = np.dot( dA ,np.dot(Prev_cov, A.T))
-                dP_pred[:,:,j] += dP_pred[:,:,j].T
-                dP_pred[:,:,j] += np.dot( A ,np.dot(p_dP[:,:,j], A.T)) + dQ
-
-                dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize
+                dP_pred[:, :, j] = 0.5 * (
+                    dP_pred[:, :, j] + dP_pred[:, :, j].T
+                )  # symmetrize
         else:
             dm_pred = None
             dP_pred = None
@@ -1454,8 +1695,17 @@ class DescreteStateSpace(object):
         return m_pred, P_pred, dm_pred, dP_pred
 
     @staticmethod
-    def _kalman_update_step(k,   p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False,
-                            calc_grad_log_likelihood=False, p_dm = None, p_dP = None):
+    def _kalman_update_step(
+        k,
+        p_m,
+        p_P,
+        p_meas_model_callable,
+        measurement,
+        calc_log_likelihood=False,
+        calc_grad_log_likelihood=False,
+        p_dm=None,
+        p_dP=None,
+    ):
         """
         Input:
 
@@ -1507,45 +1757,54 @@ class DescreteStateSpace(object):
             adds extra columns to the gradient.
 
         """
-        #import pdb; pdb.set_trace()
+        # import pdb; pdb.set_trace()
 
-        m_pred = p_m # from prediction step
-        P_pred = p_P # from prediction step
+        m_pred = p_m  # from prediction step
+        P_pred = p_P  # from prediction step
 
         H = p_meas_model_callable.Hk(k, m_pred, P_pred)
         R = p_meas_model_callable.Rk(k)
 
-        time_series_no = p_m.shape[1] # number of time serieses
+        time_series_no = p_m.shape[1]  # number of time serieses
 
-        log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None
+        log_likelihood_update = None
+        dm_upd = None
+        dP_upd = None
+        d_log_likelihood_update = None
         # Update step (only if there is data)
-        #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
-        v = measurement-p_meas_model_callable.f_h(k, m_pred, H)
+        # if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
+        v = measurement - p_meas_model_callable.f_h(k, m_pred, H)
         S = H.dot(P_pred).dot(H.T) + R
-        if measurement.shape[0]==1: # measurements are one dimensional
-            if (S < 0):
-                raise ValueError("Kalman Filter Update: S is negative step %i" % k )
-                 #import pdb; pdb.set_trace()
+        if measurement.shape[0] == 1:  # measurements are one dimensional
+            if S < 0:
+                raise ValueError("Kalman Filter Update: S is negative step %i" % k)
+                # import pdb; pdb.set_trace()
 
             K = P_pred.dot(H.T) / S
             if calc_log_likelihood:
-                log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
-                                    v*v / S)
-                #log_likelihood_update = log_likelihood_update[0,0] # to make int
-                if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None.
+                log_likelihood_update = -0.5 * (
+                    np.log(2 * np.pi) + np.log(S) + v * v / S
+                )
+                # log_likelihood_update = log_likelihood_update[0,0] # to make int
+                if np.any(
+                    np.isnan(log_likelihood_update)
+                ):  # some member in P_pred is None.
                     raise ValueError("Nan values in likelihood update!")
-            LL = None; islower = None
+            LL = None
+            islower = None
         else:
-            LL,islower = linalg.cho_factor(S)
-            K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T
+            LL, islower = linalg.cho_factor(S)
+            K = linalg.cho_solve((LL, islower), H.dot(P_pred.T)).T
 
             if calc_log_likelihood:
-                log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) +
-                    2*np.sum( np.log(np.diag(LL)) ) +\
-                        np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v
+                log_likelihood_update = -0.5 * (
+                    v.shape[0] * np.log(2 * np.pi)
+                    + 2 * np.sum(np.log(np.diag(LL)))
+                    + np.sum((linalg.cho_solve((LL, islower), v)) * v, axis=0)
+                )  # diagonal of v.T*S^{-1}*v
 
         if calc_grad_log_likelihood:
-            dm_pred_all_params = p_dm # derivativas of the prediction phase
+            dm_pred_all_params = p_dm  # derivativas of the prediction phase
             dP_pred_all_params = p_dP
 
             param_number = p_dP.shape[2]
@@ -1556,75 +1815,95 @@ class DescreteStateSpace(object):
             dm_upd = np.empty(dm_pred_all_params.shape)
             dP_upd = np.empty(dP_pred_all_params.shape)
 
-             # firts dimension parameter_no, second - time series number
-            d_log_likelihood_update = np.empty((param_number,time_series_no))
+            # firts dimension parameter_no, second - time series number
+            d_log_likelihood_update = np.empty((param_number, time_series_no))
             for param in range(param_number):
+                dH = dH_all_params[:, :, param]
+                dR = dR_all_params[:, :, param]
 
-               dH = dH_all_params[:,:,param]
-               dR = dR_all_params[:,:,param]
-
-               dm_pred = dm_pred_all_params[:,:,param]
-               dP_pred = dP_pred_all_params[:,:,param]
+                dm_pred = dm_pred_all_params[:, :, param]
+                dP_pred = dP_pred_all_params[:, :, param]
 
                 # Terms in the likelihood derivatives
-               dv = - np.dot( dH, m_pred) -  np.dot( H, dm_pred)
-               dS = np.dot(dH, np.dot( P_pred, H.T))
-               dS += dS.T
-               dS += np.dot(H, np.dot( dP_pred, H.T)) + dR
+                dv = -np.dot(dH, m_pred) - np.dot(H, dm_pred)
+                dS = np.dot(dH, np.dot(P_pred, H.T))
+                dS += dS.T
+                dS += np.dot(H, np.dot(dP_pred, H.T)) + dR
 
-               # TODO: maybe symmetrize dS
+                # TODO: maybe symmetrize dS
 
-               #dm and dP for the next stem
-               if LL is not None: # the state vector is not a scalar
-                   tmp1 = linalg.cho_solve((LL,islower), H).T
-                   tmp2 = linalg.cho_solve((LL,islower), dH).T
-                   tmp3 = linalg.cho_solve((LL,islower), dS).T
-               else: # the state vector is a scalar
-                   tmp1 = H.T / S
-                   tmp2 = dH.T / S
-                   tmp3 = dS.T / S
+                # dm and dP for the next stem
+                if LL is not None:  # the state vector is not a scalar
+                    tmp1 = linalg.cho_solve((LL, islower), H).T
+                    tmp2 = linalg.cho_solve((LL, islower), dH).T
+                    tmp3 = linalg.cho_solve((LL, islower), dS).T
+                else:  # the state vector is a scalar
+                    tmp1 = H.T / S
+                    tmp2 = dH.T / S
+                    tmp3 = dS.T / S
 
-               dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \
-                    np.dot( P_pred, np.dot( tmp1, tmp3 ) )
+                dK = (
+                    np.dot(dP_pred, tmp1)
+                    + np.dot(P_pred, tmp2)
+                    - np.dot(P_pred, np.dot(tmp1, tmp3))
+                )
 
                 # terms required for the next step, save this for each parameter
-               dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
+                dm_upd[:, :, param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
 
-               dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))
-               dP_upd[:,:,param] += dP_upd[:,:,param].T
-               dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
+                dP_upd[:, :, param] = -np.dot(dK, np.dot(S, K.T))
+                dP_upd[:, :, param] += dP_upd[:, :, param].T
+                dP_upd[:, :, param] += dP_pred - np.dot(K, np.dot(dS, K.T))
 
-               dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize
+                dP_upd[:, :, param] = 0.5 * (
+                    dP_upd[:, :, param] + dP_upd[:, :, param].T
+                )  # symmetrize
                 # computing the likelihood change for each parameter:
-               if LL is not None: # the state vector is not 1D
-                    #tmp4 = linalg.cho_solve((LL,islower), dv)
-                   tmp5 = linalg.cho_solve((LL,islower), v)
-               else: # the state vector is a scalar
-                   #tmp4 = dv / S
-                   tmp5 = v / S
+                if LL is not None:  # the state vector is not 1D
+                    # tmp4 = linalg.cho_solve((LL,islower), dv)
+                    tmp5 = linalg.cho_solve((LL, islower), v)
+                else:  # the state vector is a scalar
+                    # tmp4 = dv / S
+                    tmp5 = v / S
 
-
-               d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \
-                    np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) )
+                d_log_likelihood_update[param, :] = -(
+                    0.5 * np.sum(np.diag(tmp3))
+                    + np.sum(tmp5 * dv, axis=0)
+                    - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0)
+                )
                 # Before
-                #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
-                #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) )
-
-
+                # d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
+                # np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) )
 
         # Compute the actual updates for mean and variance of the states.
-        m_upd = m_pred + K.dot( v )
+        m_upd = m_pred + K.dot(v)
 
         # Covariance update and ensure it is symmetric
         P_upd = K.dot(S).dot(K.T)
-        P_upd = 0.5*(P_upd + P_upd.T)
-        P_upd =  P_pred - P_upd# this update matrix is symmetric
+        P_upd = 0.5 * (P_upd + P_upd.T)
+        P_upd = P_pred - P_upd  # this update matrix is symmetric
 
-        return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
+        return (
+            m_upd,
+            P_upd,
+            log_likelihood_update,
+            dm_upd,
+            dP_upd,
+            d_log_likelihood_update,
+        )
 
     @staticmethod
-    def _kalman_update_step_SVD(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False,
-                            calc_grad_log_likelihood=False, p_dm = None, p_dP = None):
+    def _kalman_update_step_SVD(
+        k,
+        p_m,
+        p_P,
+        p_meas_model_callable,
+        measurement,
+        calc_log_likelihood=False,
+        calc_grad_log_likelihood=False,
+        p_dm=None,
+        p_dP=None,
+    ):
         """
         Input:
 
@@ -1700,67 +1979,84 @@ class DescreteStateSpace(object):
 
         """
 
-        #import pdb; pdb.set_trace()
+        # import pdb; pdb.set_trace()
 
-        m_pred = p_m # from prediction step
-        P_pred,S_pred,V_pred = p_P # from prediction step
+        m_pred = p_m  # from prediction step
+        P_pred, S_pred, V_pred = p_P  # from prediction step
 
         H = p_meas_model_callable.Hk(k, m_pred, P_pred)
         R = p_meas_model_callable.Rk(k)
-        R_isr = p_meas_model_callable.R_isrk(k) # square root of the inverse of R matrix
+        R_isr = p_meas_model_callable.R_isrk(
+            k
+        )  # square root of the inverse of R matrix
 
-        time_series_no = p_m.shape[1] # number of time serieses
+        time_series_no = p_m.shape[1]  # number of time serieses
 
-        log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None
+        log_likelihood_update = None
+        dm_upd = None
+        dP_upd = None
+        d_log_likelihood_update = None
         # Update step (only if there is data)
-        #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
-        v = measurement-p_meas_model_callable.f_h(k, m_pred, H)
+        # if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
+        v = measurement - p_meas_model_callable.f_h(k, m_pred, H)
 
-        svd_2_matr = np.vstack( ( np.dot( R_isr.T, np.dot(H, V_pred)) , np.diag( 1.0/np.sqrt(S_pred) ) ) )
+        svd_2_matr = np.vstack(
+            (np.dot(R_isr.T, np.dot(H, V_pred)), np.diag(1.0 / np.sqrt(S_pred)))
+        )
 
-        (U,S,Vh) = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True,
-                     overwrite_a=False,check_finite=True)
+        (U, S, Vh) = sp.linalg.svd(
+            svd_2_matr,
+            full_matrices=False,
+            compute_uv=True,
+            overwrite_a=False,
+            check_finite=True,
+        )
 
-         # P_upd = U_upd S_upd**2 U_upd.T
+        # P_upd = U_upd S_upd**2 U_upd.T
         U_upd = np.dot(V_pred, Vh.T)
-        S_upd = (1.0/S)**2
+        S_upd = (1.0 / S) ** 2
 
-        P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance
-        P_upd = (P_upd,S_upd,U_upd) # tuple to pass to the next step
+        P_upd = np.dot(U_upd * S_upd, U_upd.T)  # update covariance
+        P_upd = (P_upd, S_upd, U_upd)  # tuple to pass to the next step
 
-         # stil need to compute S and K for derivative computation
+        # stil need to compute S and K for derivative computation
         S = H.dot(P_pred).dot(H.T) + R
-        if measurement.shape[0]==1: # measurements are one dimensional
-            if (S < 0):
-                raise ValueError("Kalman Filter Update: S is negative step %i" % k )
-                 #import pdb; pdb.set_trace()
+        if measurement.shape[0] == 1:  # measurements are one dimensional
+            if S < 0:
+                raise ValueError("Kalman Filter Update: S is negative step %i" % k)
+                # import pdb; pdb.set_trace()
 
             K = P_pred.dot(H.T) / S
             if calc_log_likelihood:
-                log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
-                                    v*v / S)
-                #log_likelihood_update = log_likelihood_update[0,0] # to make int
-                if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None.
+                log_likelihood_update = -0.5 * (
+                    np.log(2 * np.pi) + np.log(S) + v * v / S
+                )
+                # log_likelihood_update = log_likelihood_update[0,0] # to make int
+                if np.any(
+                    np.isnan(log_likelihood_update)
+                ):  # some member in P_pred is None.
                     raise ValueError("Nan values in likelihood update!")
-            LL = None; islower = None
+            LL = None
+            islower = None
         else:
-            LL,islower = linalg.cho_factor(S)
-            K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T
+            LL, islower = linalg.cho_factor(S)
+            K = linalg.cho_solve((LL, islower), H.dot(P_pred.T)).T
 
             if calc_log_likelihood:
-                log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) +
-                    2*np.sum( np.log(np.diag(LL)) ) +\
-                        np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v
-
+                log_likelihood_update = -0.5 * (
+                    v.shape[0] * np.log(2 * np.pi)
+                    + 2 * np.sum(np.log(np.diag(LL)))
+                    + np.sum((linalg.cho_solve((LL, islower), v)) * v, axis=0)
+                )  # diagonal of v.T*S^{-1}*v
 
         # Old  method of computing updated covariance (for testing) ->
-        #P_upd_tst = K.dot(S).dot(K.T)
-        #P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T)
-        #P_upd_tst =  P_pred - P_upd_tst# this update matrix is symmetric
+        # P_upd_tst = K.dot(S).dot(K.T)
+        # P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T)
+        # P_upd_tst =  P_pred - P_upd_tst# this update matrix is symmetric
         # Old  method of computing updated covariance (for testing) <-
 
         if calc_grad_log_likelihood:
-            dm_pred_all_params = p_dm # derivativas of the prediction phase
+            dm_pred_all_params = p_dm  # derivativas of the prediction phase
             dP_pred_all_params = p_dP
 
             param_number = p_dP.shape[2]
@@ -1771,67 +2067,88 @@ class DescreteStateSpace(object):
             dm_upd = np.empty(dm_pred_all_params.shape)
             dP_upd = np.empty(dP_pred_all_params.shape)
 
-             # firts dimension parameter_no, second - time series number
-            d_log_likelihood_update = np.empty((param_number,time_series_no))
+            # firts dimension parameter_no, second - time series number
+            d_log_likelihood_update = np.empty((param_number, time_series_no))
             for param in range(param_number):
+                dH = dH_all_params[:, :, param]
+                dR = dR_all_params[:, :, param]
 
-               dH = dH_all_params[:,:,param]
-               dR = dR_all_params[:,:,param]
-
-               dm_pred = dm_pred_all_params[:,:,param]
-               dP_pred = dP_pred_all_params[:,:,param]
+                dm_pred = dm_pred_all_params[:, :, param]
+                dP_pred = dP_pred_all_params[:, :, param]
 
                 # Terms in the likelihood derivatives
-               dv = - np.dot( dH, m_pred) -  np.dot( H, dm_pred)
-               dS = np.dot(dH, np.dot( P_pred, H.T))
-               dS += dS.T
-               dS += np.dot(H, np.dot( dP_pred, H.T)) + dR
+                dv = -np.dot(dH, m_pred) - np.dot(H, dm_pred)
+                dS = np.dot(dH, np.dot(P_pred, H.T))
+                dS += dS.T
+                dS += np.dot(H, np.dot(dP_pred, H.T)) + dR
 
                 # TODO: maybe symmetrize dS
 
-                #dm and dP for the next stem
-               if LL is not None: # the state vector is not a scalar
-                   tmp1 = linalg.cho_solve((LL,islower), H).T
-                   tmp2 = linalg.cho_solve((LL,islower), dH).T
-                   tmp3 = linalg.cho_solve((LL,islower), dS).T
-               else: # the state vector is a scalar
-                   tmp1 = H.T / S
-                   tmp2 = dH.T / S
-                   tmp3 = dS.T / S
+                # dm and dP for the next stem
+                if LL is not None:  # the state vector is not a scalar
+                    tmp1 = linalg.cho_solve((LL, islower), H).T
+                    tmp2 = linalg.cho_solve((LL, islower), dH).T
+                    tmp3 = linalg.cho_solve((LL, islower), dS).T
+                else:  # the state vector is a scalar
+                    tmp1 = H.T / S
+                    tmp2 = dH.T / S
+                    tmp3 = dS.T / S
 
-               dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \
-                    np.dot( P_pred, np.dot( tmp1, tmp3 ) )
+                dK = (
+                    np.dot(dP_pred, tmp1)
+                    + np.dot(P_pred, tmp2)
+                    - np.dot(P_pred, np.dot(tmp1, tmp3))
+                )
 
-               # terms required for the next step, save this for each parameter
-               dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
+                # terms required for the next step, save this for each parameter
+                dm_upd[:, :, param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
 
-               dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))
-               dP_upd[:,:,param] += dP_upd[:,:,param].T
-               dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
+                dP_upd[:, :, param] = -np.dot(dK, np.dot(S, K.T))
+                dP_upd[:, :, param] += dP_upd[:, :, param].T
+                dP_upd[:, :, param] += dP_pred - np.dot(K, np.dot(dS, K.T))
 
-               dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize
-               # computing the likelihood change for each parameter:
-               if LL is not None: # the state vector is not 1D
-                   tmp5 = linalg.cho_solve((LL,islower), v)
-               else: # the state vector is a scalar
-                   tmp5 = v / S
+                dP_upd[:, :, param] = 0.5 * (
+                    dP_upd[:, :, param] + dP_upd[:, :, param].T
+                )  # symmetrize
+                # computing the likelihood change for each parameter:
+                if LL is not None:  # the state vector is not 1D
+                    tmp5 = linalg.cho_solve((LL, islower), v)
+                else:  # the state vector is a scalar
+                    tmp5 = v / S
 
-
-               d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \
-                   np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) )
+                d_log_likelihood_update[param, :] = -(
+                    0.5 * np.sum(np.diag(tmp3))
+                    + np.sum(tmp5 * dv, axis=0)
+                    - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0)
+                )
                 # Before
-                #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
-                #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) )
+                # d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
+                # np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) )
 
         # Compute the actual updates for mean of the states. Variance update
         # is computed earlier.
-        m_upd = m_pred + K.dot( v )
+        m_upd = m_pred + K.dot(v)
 
-        return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
+        return (
+            m_upd,
+            P_upd,
+            log_likelihood_update,
+            dm_upd,
+            dP_upd,
+            d_log_likelihood_update,
+        )
 
     @staticmethod
-    def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step,
-                                  p_P_prev_step, p_dynamic_callables):
+    def _rts_smoother_update_step(
+        k,
+        p_m,
+        p_P,
+        p_m_pred,
+        p_P_pred,
+        p_m_prev_step,
+        p_P_prev_step,
+        p_dynamic_callables,
+    ):
         """
         Rauch–Tung–Striebel(RTS) update step
 
@@ -1867,31 +2184,30 @@ class DescreteStateSpace(object):
 
         """
 
-        A = p_dynamic_callables.Ak(k,p_m,p_P) # state transition matrix (or Jacobian)
+        A = p_dynamic_callables.Ak(k, p_m, p_P)  # state transition matrix (or Jacobian)
 
-        tmp = np.dot( A, p_P.T)
-        if A.shape[0] == 1: # 1D states
-            G = tmp.T / p_P_pred # P[:,:,k] is symmetric
+        tmp = np.dot(A, p_P.T)
+        if A.shape[0] == 1:  # 1D states
+            G = tmp.T / p_P_pred  # P[:,:,k] is symmetric
         else:
             try:
-                LL,islower = linalg.cho_factor(p_P_pred)
-                G = linalg.cho_solve((LL,islower),tmp).T
+                LL, islower = linalg.cho_factor(p_P_pred)
+                G = linalg.cho_solve((LL, islower), tmp).T
             except:
                 # It happende that p_P_pred has several near zero eigenvalues
                 # hence the Cholesky method does not work.
                 res = sp.linalg.lstsq(p_P_pred, tmp)
                 G = res[0].T
 
-        m_upd = p_m + G.dot( p_m_prev_step-p_m_pred )
-        P_upd = p_P + G.dot( p_P_prev_step-p_P_pred).dot(G.T)
+        m_upd = p_m + G.dot(p_m_prev_step - p_m_pred)
+        P_upd = p_P + G.dot(p_P_prev_step - p_P_pred).dot(G.T)
 
-        P_upd = 0.5*(P_upd + P_upd.T)
+        P_upd = 0.5 * (P_upd + P_upd.T)
 
         return m_upd, P_upd, G
 
     @classmethod
-    def rts_smoother(cls,state_dim, p_dynamic_callables, filter_means,
-                          filter_covars):
+    def rts_smoother(cls, state_dim, p_dynamic_callables, filter_means, filter_covars):
         """
         This function implements Rauch–Tung–Striebel(RTS) smoother algorithm
         based on the results of kalman_filter_raw.
@@ -1934,41 +2250,69 @@ class DescreteStateSpace(object):
             Smoothed estimates of the state covariances
         """
 
-        no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance)
+        no_steps = (
+            filter_covars.shape[0] - 1
+        )  # number of steps (minus initial covariance)
 
-        M = np.empty(filter_means.shape) # smoothed means
-        P = np.empty(filter_covars.shape) # smoothed covars
-        #G = np.empty( (no_steps,state_dim,state_dim)  ) # G from the update step of the smoother
+        M = np.empty(filter_means.shape)  # smoothed means
+        P = np.empty(filter_covars.shape)  # smoothed covars
+        # G = np.empty( (no_steps,state_dim,state_dim)  ) # G from the update step of the smoother
 
-        M[-1,:] = filter_means[-1,:]
-        P[-1,:,:] = filter_covars[-1,:,:]
-        for k in range(no_steps-1,-1,-1):
+        M[-1, :] = filter_means[-1, :]
+        P[-1, :, :] = filter_covars[-1, :, :]
+        for k in range(no_steps - 1, -1, -1):
+            m_pred, P_pred, tmp1, tmp2 = cls._kalman_prediction_step(
+                k,
+                filter_means[k, :],
+                filter_covars[k, :, :],
+                p_dynamic_callables,
+                calc_grad_log_likelihood=False,
+            )
+            p_m = filter_means[k, :]
+            if len(p_m.shape) < 2:
+                p_m.shape = (p_m.shape[0], 1)
 
-            m_pred, P_pred, tmp1, tmp2 = \
-                    cls._kalman_prediction_step(k, filter_means[k,:],
-                                                filter_covars[k,:,:], p_dynamic_callables,
-                                                calc_grad_log_likelihood=False)
-            p_m = filter_means[k,:]
-            if len(p_m.shape)<2:
-                p_m.shape = (p_m.shape[0],1)
+            p_m_prev_step = M[k + 1, :]
+            if len(p_m_prev_step.shape) < 2:
+                p_m_prev_step.shape = (p_m_prev_step.shape[0], 1)
 
-            p_m_prev_step = M[k+1,:]
-            if len(p_m_prev_step.shape)<2:
-                p_m_prev_step.shape = (p_m_prev_step.shape[0],1)
+            m_upd, P_upd, G_tmp = cls._rts_smoother_update_step(
+                k,
+                p_m,
+                filter_covars[k, :, :],
+                m_pred,
+                P_pred,
+                p_m_prev_step,
+                P[k + 1, :, :],
+                p_dynamic_callables,
+            )
 
-            m_upd, P_upd, G_tmp = cls._rts_smoother_update_step(k,
-                            p_m ,filter_covars[k,:,:],
-                            m_pred, P_pred, p_m_prev_step ,P[k+1,:,:], p_dynamic_callables)
-
-            M[k,:] = m_upd#np.squeeze(m_upd)
-            P[k,:,:] = P_upd
-            #G[k,:,:] = G_upd.T # store transposed G.
+            M[k, :] = m_upd  # np.squeeze(m_upd)
+            P[k, :, :] = P_upd
+            # G[k,:,:] = G_upd.T # store transposed G.
         # Return values
 
-        return (M, P) #, G)
+        return (M, P)  # , G)
 
     @staticmethod
-    def _EM_gradient(A,Q,H,R,m_init,P_init,measurements, M, P, G, dA, dQ, dH, dR, dm_init, dP_init):
+    def _EM_gradient(
+        A,
+        Q,
+        H,
+        R,
+        m_init,
+        P_init,
+        measurements,
+        M,
+        P,
+        G,
+        dA,
+        dQ,
+        dH,
+        dR,
+        dm_init,
+        dP_init,
+    ):
         """
         Gradient computation with the EM algorithm.
 
@@ -1979,35 +2323,37 @@ class DescreteStateSpace(object):
         P: Variances from the smoother
         G: Gains? from the smoother
         """
-        import pdb; pdb.set_trace();
+        import pdb
+
+        pdb.set_trace()
 
         param_number = dA.shape[-1]
-        d_log_likelihood_update = np.empty((param_number,1))
+        d_log_likelihood_update = np.empty((param_number, 1))
 
         sample_no = measurements.shape[0]
-        P_1 = P[1:,:,:] # remove 0-th step
-        P_2 = P[0:-1,:,:] # remove 0-th step
+        P_1 = P[1:, :, :]  # remove 0-th step
+        P_2 = P[0:-1, :, :]  # remove 0-th step
 
-        M_1 = M[1:,:] # remove 0-th step
-        M_2 = M[0:-1,:] # remove the last step
+        M_1 = M[1:, :]  # remove 0-th step
+        M_2 = M[0:-1, :]  # remove the last step
 
-        Sigma = np.mean(P_1,axis=0) + np.dot(M_1.T, M_1) / sample_no #
-        Phi =   np.mean(P_2,axis=0) + np.dot(M_2.T, M_2) / sample_no #
+        Sigma = np.mean(P_1, axis=0) + np.dot(M_1.T, M_1) / sample_no  #
+        Phi = np.mean(P_2, axis=0) + np.dot(M_2.T, M_2) / sample_no  #
 
-        B = np.dot( measurements.T, M_1 )/ sample_no
-        C =   (sp.einsum( 'ijk,ikl', P_1, G) + np.dot(M_1.T, M_2)) / sample_no #
+        B = np.dot(measurements.T, M_1) / sample_no
+        C = (sp.einsum("ijk,ikl", P_1, G) + np.dot(M_1.T, M_2)) / sample_no  #
 
-#        C1 = np.zeros( (P_1.shape[1],P_1.shape[1]) )
-#        for k in range(P_1.shape[0]):
-#            C1 += np.dot(P_1[k,:,:],G[k,:,:]) + sp.outer( M_1[k,:], M_2[k,:] )
-#        C1 = C1 / sample_no
+        #        C1 = np.zeros( (P_1.shape[1],P_1.shape[1]) )
+        #        for k in range(P_1.shape[0]):
+        #            C1 += np.dot(P_1[k,:,:],G[k,:,:]) + sp.outer( M_1[k,:], M_2[k,:] )
+        #        C1 = C1 / sample_no
 
-        D = np.dot( measurements.T, measurements ) / sample_no
+        D = np.dot(measurements.T, measurements) / sample_no
 
         try:
             P_init_inv = sp.linalg.inv(P_init)
 
-            if np.max( np.abs(P_init_inv)) > 10e13:
+            if np.max(np.abs(P_init_inv)) > 10e13:
                 compute_P_init_terms = False
             else:
                 compute_P_init_terms = True
@@ -2017,7 +2363,7 @@ class DescreteStateSpace(object):
         try:
             Q_inv = sp.linalg.inv(Q)
 
-            if np.max( np.abs(Q_inv)) > 10e13:
+            if np.max(np.abs(Q_inv)) > 10e13:
                 compute_Q_terms = False
             else:
                 compute_Q_terms = True
@@ -2027,54 +2373,84 @@ class DescreteStateSpace(object):
         try:
             R_inv = sp.linalg.inv(R)
 
-            if np.max( np.abs(R_inv)) > 10e13:
+            if np.max(np.abs(R_inv)) > 10e13:
                 compute_R_terms = False
             else:
                 compute_R_terms = True
         except np.linalg.LinAlgError:
             compute_R_terms = False
 
-
-        d_log_likelihood_update = np.zeros((param_number,1))
+        d_log_likelihood_update = np.zeros((param_number, 1))
         for j in range(param_number):
             if compute_P_init_terms:
-                d_log_likelihood_update[j,:] -= 0.5 * np.sum(P_init_inv* dP_init[:,:,j].T ) #p #m
+                d_log_likelihood_update[j, :] -= 0.5 * np.sum(
+                    P_init_inv * dP_init[:, :, j].T
+                )  # p #m
 
-                M0_smoothed = M[0]; M0_smoothed.shape = (M0_smoothed.shape[0],1)
-                tmp1 = np.dot( dP_init[:,:,j], np.dot( P_init_inv, (P[0,:,:] + sp.outer( (M0_smoothed - m_init), (M0_smoothed - m_init) )) )  ) #p #m
-                d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp1.T )
+                M0_smoothed = M[0]
+                M0_smoothed.shape = (M0_smoothed.shape[0], 1)
+                tmp1 = np.dot(
+                    dP_init[:, :, j],
+                    np.dot(
+                        P_init_inv,
+                        (
+                            P[0, :, :]
+                            + sp.outer((M0_smoothed - m_init), (M0_smoothed - m_init))
+                        ),
+                    ),
+                )  # p #m
+                d_log_likelihood_update[j, :] += 0.5 * np.sum(P_init_inv * tmp1.T)
 
-                tmp2 = sp.outer( dm_init[:,j], M0_smoothed )
+                tmp2 = sp.outer(dm_init[:, j], M0_smoothed)
                 tmp2 += tmp2.T
-                d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp2.T )
+                d_log_likelihood_update[j, :] += 0.5 * np.sum(P_init_inv * tmp2.T)
 
             if compute_Q_terms:
+                d_log_likelihood_update[j, :] -= (
+                    sample_no / 2.0 * np.sum(Q_inv * dQ[:, :, j].T)
+                )  # m
 
-                d_log_likelihood_update[j,:] -=  sample_no/2.0 * np.sum(Q_inv* dQ[:,:,j].T ) #m
+                tmp1 = np.dot(C, A.T)
+                tmp1 += tmp1.T
+                tmp1 = Sigma - tmp1 + np.dot(A, np.dot(Phi, A.T))  # m
+                tmp1 = np.dot(dQ[:, :, j], np.dot(Q_inv, tmp1))
+                d_log_likelihood_update[j, :] += (
+                    sample_no / 2.0 * np.sum(Q_inv * tmp1.T)
+                )
 
-                tmp1 = np.dot(C,A.T); tmp1 += tmp1.T; tmp1 = Sigma - tmp1 + np.dot(A, np.dot(Phi,A.T)) #m
-                tmp1 = np.dot( dQ[:,:,j], np.dot( Q_inv, tmp1) )
-                d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(Q_inv * tmp1.T)
-
-                tmp2 = np.dot( dA[:,:,j], C.T); tmp2 += tmp2.T;
-                tmp3 = np.dot(dA[:,:,j], np.dot(Phi,A.T)); tmp3 += tmp3.T
-                d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv.T * (tmp3 - tmp2) )
+                tmp2 = np.dot(dA[:, :, j], C.T)
+                tmp2 += tmp2.T
+                tmp3 = np.dot(dA[:, :, j], np.dot(Phi, A.T))
+                tmp3 += tmp3.T
+                d_log_likelihood_update[j, :] -= (
+                    sample_no / 2.0 * np.sum(Q_inv.T * (tmp3 - tmp2))
+                )
 
             if compute_R_terms:
-                d_log_likelihood_update[j,:] -=  sample_no/2.0 * np.sum(R_inv* dR[:,:,j].T )
+                d_log_likelihood_update[j, :] -= (
+                    sample_no / 2.0 * np.sum(R_inv * dR[:, :, j].T)
+                )
 
-                tmp1 = np.dot(B,H.T); tmp1 += tmp1.T; tmp1 = D - tmp1 + np.dot(H, np.dot(Sigma,H.T))
-                tmp1 = np.dot( dR[:,:,j], np.dot( R_inv, tmp1) )
-                d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(R_inv * tmp1.T)
+                tmp1 = np.dot(B, H.T)
+                tmp1 += tmp1.T
+                tmp1 = D - tmp1 + np.dot(H, np.dot(Sigma, H.T))
+                tmp1 = np.dot(dR[:, :, j], np.dot(R_inv, tmp1))
+                d_log_likelihood_update[j, :] += (
+                    sample_no / 2.0 * np.sum(R_inv * tmp1.T)
+                )
 
-                tmp2 = np.dot( dH[:,:,j], B.T); tmp2 += tmp2.T;
-                tmp3 = np.dot(dH[:,:,j], np.dot(Sigma,H.T)); tmp3 += tmp3.T
-                d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv.T * (tmp3 - tmp2) )
+                tmp2 = np.dot(dH[:, :, j], B.T)
+                tmp2 += tmp2.T
+                tmp3 = np.dot(dH[:, :, j], np.dot(Sigma, H.T))
+                tmp3 += tmp3.T
+                d_log_likelihood_update[j, :] -= (
+                    sample_no / 2.0 * np.sum(R_inv.T * (tmp3 - tmp2))
+                )
 
         return d_log_likelihood_update
 
     @staticmethod
-    def _check_SS_matrix(p_M, state_dim, measurement_dim, which='A'):
+    def _check_SS_matrix(p_M, state_dim, measurement_dim, which="A"):
         """
         Veryfy that on exit the matrix has appropriate shape for the KF algorithm.
 
@@ -2096,30 +2472,42 @@ class DescreteStateSpace(object):
         """
 
         old_M_shape = None
-        if len(p_M.shape) < 3: # new shape is 3 dimensional
-            old_M_shape = p_M.shape # save shape to restore it on exit
-            if len(p_M.shape) == 2: # matrix
-                p_M.shape = (p_M.shape[0],p_M.shape[1],1)
-            elif len(p_M.shape) == 1: # scalar but in array already
-                if (p_M.shape[0] != 1):
-                    raise ValueError("Matrix %s is an 1D array, while it must be a matrix or scalar", which)
+        if len(p_M.shape) < 3:  # new shape is 3 dimensional
+            old_M_shape = p_M.shape  # save shape to restore it on exit
+            if len(p_M.shape) == 2:  # matrix
+                p_M.shape = (p_M.shape[0], p_M.shape[1], 1)
+            elif len(p_M.shape) == 1:  # scalar but in array already
+                if p_M.shape[0] != 1:
+                    raise ValueError(
+                        "Matrix %s is an 1D array, while it must be a matrix or scalar",
+                        which,
+                    )
                 else:
-                    p_M.shape = (1,1,1)
+                    p_M.shape = (1, 1, 1)
 
-        if (which == 'A') or (which == 'Q'):
+        if (which == "A") or (which == "Q"):
             if (p_M.shape[0] != state_dim) or (p_M.shape[1] != state_dim):
-                raise ValueError("%s must be a square matrix of size (%i,%i)" % (which, state_dim, state_dim))
-        if (which == 'H'):
+                raise ValueError(
+                    "%s must be a square matrix of size (%i,%i)"
+                    % (which, state_dim, state_dim)
+                )
+        if which == "H":
             if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != state_dim):
-                raise ValueError("H must be of shape (measurement_dim, state_dim) (%i,%i)" % (measurement_dim, state_dim))
-        if (which == 'R'):
+                raise ValueError(
+                    "H must be of shape (measurement_dim, state_dim) (%i,%i)"
+                    % (measurement_dim, state_dim)
+                )
+        if which == "R":
             if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != measurement_dim):
-                raise ValueError("R must be of shape (measurement_dim, measurement_dim) (%i,%i)" % (measurement_dim, measurement_dim))
+                raise ValueError(
+                    "R must be of shape (measurement_dim, measurement_dim) (%i,%i)"
+                    % (measurement_dim, measurement_dim)
+                )
 
-        return (p_M,old_M_shape)
+        return (p_M, old_M_shape)
 
     @staticmethod
-    def _check_grad_state_matrices(dM, state_dim, grad_params_no, which = 'dA'):
+    def _check_grad_state_matrices(dM, state_dim, grad_params_no, which="dA"):
         """
         Function checks (mostly check dimensions) matrices for marginal likelihood
         gradient parameters calculation. It check dA, dQ matrices.
@@ -2147,32 +2535,34 @@ class DescreteStateSpace(object):
 
         """
 
-
         if dM is None:
-            dM=np.zeros((state_dim,state_dim,grad_params_no))
+            dM = np.zeros((state_dim, state_dim, grad_params_no))
         elif isinstance(dM, np.ndarray):
             if state_dim == 1:
                 if len(dM.shape) < 3:
-                    dM.shape = (1,1,1)
+                    dM.shape = (1, 1, 1)
             else:
                 if len(dM.shape) < 3:
-                    dM.shape = (state_dim,state_dim,1)
-        elif isinstance(dM, np.int):
+                    dM.shape = (state_dim, state_dim, 1)
+        elif isinstance(dM, int):
             if state_dim > 1:
-                raise ValueError("When computing likelihood gradient wrong %s dimension." % which)
+                raise ValueError(
+                    "When computing likelihood gradient wrong %s dimension." % which
+                )
             else:
-                dM = np.ones((1,1,1)) * dM
+                dM = np.ones((1, 1, 1)) * dM
 
-#        if not isinstance(dM, types.FunctionType):
-#            f_dM = lambda k: dM
-#        else:
-#            f_dM = dM
+        #        if not isinstance(dM, types.FunctionType):
+        #            f_dM = lambda k: dM
+        #        else:
+        #            f_dM = dM
 
         return dM
 
-
     @staticmethod
-    def _check_grad_measurement_matrices(dM, state_dim, grad_params_no, measurement_dim, which = 'dH'):
+    def _check_grad_measurement_matrices(
+        dM, state_dim, grad_params_no, measurement_dim, which="dH"
+    ):
         """
         Function checks (mostly check dimensions) matrices for marginal likelihood
         gradient parameters calculation. It check dH, dR matrices.
@@ -2206,38 +2596,40 @@ class DescreteStateSpace(object):
         """
 
         if dM is None:
-            if which == 'dH':
-                dM=np.zeros((measurement_dim ,state_dim,grad_params_no))
-            elif  which == 'dR':
-                dM=np.zeros((measurement_dim,measurement_dim,grad_params_no))
+            if which == "dH":
+                dM = np.zeros((measurement_dim, state_dim, grad_params_no))
+            elif which == "dR":
+                dM = np.zeros((measurement_dim, measurement_dim, grad_params_no))
         elif isinstance(dM, np.ndarray):
             if state_dim == 1:
                 if len(dM.shape) < 3:
-                    dM.shape = (1,1,1)
+                    dM.shape = (1, 1, 1)
             else:
                 if len(dM.shape) < 3:
-                     if which == 'dH':
-                        dM.shape = (measurement_dim,state_dim,1)
-                     elif  which == 'dR':
-                        dM.shape = (measurement_dim,measurement_dim,1)
-        elif isinstance(dM, np.int):
+                    if which == "dH":
+                        dM.shape = (measurement_dim, state_dim, 1)
+                    elif which == "dR":
+                        dM.shape = (measurement_dim, measurement_dim, 1)
+        elif isinstance(dM, int):
             if state_dim > 1:
-                raise ValueError("When computing likelihood gradient wrong dH dimension.")
+                raise ValueError(
+                    "When computing likelihood gradient wrong dH dimension."
+                )
             else:
-                dM = np.ones((1,1,1)) * dM
+                dM = np.ones((1, 1, 1)) * dM
 
-#        if not isinstance(dM, types.FunctionType):
-#            f_dM = lambda k: dM
-#        else:
-#            f_dM = dM
+        #        if not isinstance(dM, types.FunctionType):
+        #            f_dM = lambda k: dM
+        #        else:
+        #            f_dM = dM
 
         return dM
 
 
-
 class Struct(object):
     pass
 
+
 class ContDescrStateSpace(DescreteStateSpace):
     """
     Class for continuous-discrete Kalman filter. State equation is
@@ -2261,7 +2653,19 @@ class ContDescrStateSpace(DescreteStateSpace):
         would take too much memory.
         """
 
-        def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
+        def __init__(
+            self,
+            F,
+            L,
+            Qc,
+            dt,
+            compute_derivatives=False,
+            grad_params_no=None,
+            P_inf=None,
+            dP_inf=None,
+            dF=None,
+            dQc=None,
+        ):
             """
             Constructor. All necessary parameters are passed here and stored
             in the opject.
@@ -2288,7 +2692,7 @@ class ContDescrStateSpace(DescreteStateSpace):
             self.L = L.copy()
             self.Qc = Qc.copy()
 
-            self.dt = dt # copy is not taken because dt is internal parameter
+            self.dt = dt  # copy is not taken because dt is internal parameter
 
             # Parameters are used to calculate derivatives but derivatives
             # are not used in the smoother. Therefore copies are not taken.
@@ -2298,8 +2702,7 @@ class ContDescrStateSpace(DescreteStateSpace):
             self.dQc = dQc
 
             self.compute_derivatives = compute_derivatives
-            self.grad_params_no =  grad_params_no
-
+            self.grad_params_no = grad_params_no
 
             self.last_k = 0
             self.last_k_computed = False
@@ -2313,14 +2716,14 @@ class ContDescrStateSpace(DescreteStateSpace):
             self.Q_svd_computed = False
             # !!!Print statistics! Which object is created
 
-        def f_a(self, k,m,A):
+        def f_a(self, k, m, A):
             """
             Dynamic model
             """
 
-            return np.dot(A, m) # default dynamic model
+            return np.dot(A, m)  # default dynamic model
 
-        def _recompute_for_new_k(self,k):
+        def _recompute_for_new_k(self, k):
             """
             Computes the necessary matrices for an index k and store the results.
 
@@ -2335,9 +2738,18 @@ class ContDescrStateSpace(DescreteStateSpace):
                     A, Q, dA dQ on step k
             """
             if (self.last_k != k) or (self.last_k_computed == False):
-                v_Ak,v_Qk, tmp, v_dAk, v_dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F,
-                        self.L,self.Qc,self.dt[k],self.compute_derivatives,
-                        grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc)
+                v_Ak, v_Qk, tmp, v_dAk, v_dQk = ContDescrStateSpace.lti_sde_to_descrete(
+                    self.F,
+                    self.L,
+                    self.Qc,
+                    self.dt[k],
+                    self.compute_derivatives,
+                    grad_params_no=self.grad_params_no,
+                    P_inf=self.P_inf,
+                    dP_inf=self.dP_inf,
+                    dF=self.dF,
+                    dQc=self.dQc,
+                )
 
                 self.last_k = k
                 self.last_k_computed = True
@@ -2345,7 +2757,7 @@ class ContDescrStateSpace(DescreteStateSpace):
                 self.v_Qk = v_Qk
                 self.v_dAk = v_dAk
                 self.v_dQk = v_dQk
-                
+
                 self.Q_square_root_computed = False
                 self.Q_inverse_computed = False
                 self.Q_svd_computed = False
@@ -2357,7 +2769,7 @@ class ContDescrStateSpace(DescreteStateSpace):
 
             # !!!Print statistics! Print sizes of matrices
 
-            return v_Ak,v_Qk, v_dAk, v_dQk
+            return v_Ak, v_Qk, v_dAk, v_dQk
 
         def reset(self, compute_derivatives):
             """
@@ -2370,44 +2782,50 @@ class ContDescrStateSpace(DescreteStateSpace):
             self.last_k = 0
             self.last_k_computed = False
             self.compute_derivatives = compute_derivatives
-            
+
             self.Q_square_root_computed = False
             self.Q_inverse_computed = False
             self.Q_svd_computed = False
             self.Q_eigen_computed = False
             return self
 
-        def Ak(self,k,m,P):
-            v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
+        def Ak(self, k, m, P):
+            v_Ak, v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
             return v_Ak
 
-        def Qk(self,k):
-            v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
+        def Qk(self, k):
+            v_Ak, v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
             return v_Qk
 
         def dAk(self, k):
-            v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
+            v_Ak, v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
             return v_dAk
 
         def dQk(self, k):
-            v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
+            v_Ak, v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
             return v_dQk
 
-        def Q_srk(self,k):
+        def Q_srk(self, k):
             """
             Check square root, maybe rewriting for Spectral decomposition is needed.
             Square root of the noise matrix Q
             """
 
-            if ((self.last_k == k) and (self.last_k_computed == True)):
+            if (self.last_k == k) and (self.last_k_computed == True):
                 if not self.Q_square_root_computed:
                     if not self.Q_svd_computed:
-                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                        (U, S, Vh) = sp.linalg.svd(
+                            self.v_Qk,
+                            full_matrices=False,
+                            compute_uv=True,
+                            overwrite_a=False,
+                            check_finite=False,
+                        )
                         self.Q_svd = (U, S, Vh)
                         self.Q_svd_computed = True
                     else:
                         (U, S, Vh) = self.Q_svd
-                        
+
                     square_root = U * np.sqrt(S)
                     self.square_root_computed = True
                     self.Q_square_root = square_root
@@ -2417,56 +2835,70 @@ class ContDescrStateSpace(DescreteStateSpace):
                 raise ValueError("Square root of Q can not be computed")
 
             return square_root
-        
-        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):        
+
+        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
             """
             Function inverts Q matrix and regularizes the inverse.
             Regularization is useful when original matrix is badly conditioned.
             Function is currently used only in SparseGP code.
-            
+
             Inputs:
             ------------------------------
             k: int
             Iteration number.
-            
+
             p_largest_cond_num: float
             Largest condition value for the inverted matrix. If cond. number is smaller than that
             no regularization happen.
-            
+
             regularization_type: 1 or 2
             Regularization type.
-            
+
             regularization_type: int (1 or 2)
-            
+
                 type 1: 1/(S[k] + regularizer) regularizer is computed
                 type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
             """
-            
-            #import pdb; pdb.set_trace()
-                    
-            if ((self.last_k == k) and (self.last_k_computed == True)):
+
+            # import pdb; pdb.set_trace()
+
+            if (self.last_k == k) and (self.last_k_computed == True):
                 if not self.Q_inverse_computed:
                     if not self.Q_svd_computed:
-                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                        (U, S, Vh) = sp.linalg.svd(
+                            self.v_Qk,
+                            full_matrices=False,
+                            compute_uv=True,
+                            overwrite_a=False,
+                            check_finite=False,
+                        )
                         self.Q_svd = (U, S, Vh)
                         self.Q_svd_computed = True
                     else:
                         (U, S, Vh) = self.Q_svd
 
-                    Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type)
-                    
+                    Q_inverse_r = psd_matrix_inverse(
+                        k,
+                        0.5 * (self.v_Qk + self.v_Qk.T),
+                        U,
+                        S,
+                        p_largest_cond_num,
+                        p_regularization_type,
+                    )
+
                     self.Q_inverse_computed = True
                     self.Q_inverse_r = Q_inverse_r
-                        
+
                 else:
                     Q_inverse_r = self.Q_inverse_r
             else:
-                raise ValueError("""Inverse of Q can not be computed, because Q has not been computed.
-                                     This requires some programming""")
+                raise ValueError(
+                    """Inverse of Q can not be computed, because Q has not been computed.
+                                     This requires some programming"""
+                )
 
             return Q_inverse_r
-        
-        
+
         def return_last(self):
             """
             Function returns last computed matrices.
@@ -2497,7 +2929,20 @@ class ContDescrStateSpace(DescreteStateSpace):
         Since all the matrices are computed all together, this object can be used
         in smoother without repeating the computations.
         """
-        def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
+
+        def __init__(
+            self,
+            F,
+            L,
+            Qc,
+            dt,
+            compute_derivatives=False,
+            grad_params_no=None,
+            P_inf=None,
+            dP_inf=None,
+            dF=None,
+            dQc=None,
+        ):
             """
             Constructor. All necessary parameters are passed here and stored
             in the opject.
@@ -2518,33 +2963,55 @@ class ContDescrStateSpace(DescreteStateSpace):
             -------------------
             Nothing
             """
-            As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F,
-                        L,Qc,dt,compute_derivatives,
-                        grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
+            (
+                As,
+                Qs,
+                reconstruct_indices,
+                dAs,
+                dQs,
+            ) = ContDescrStateSpace.lti_sde_to_descrete(
+                F,
+                L,
+                Qc,
+                dt,
+                compute_derivatives,
+                grad_params_no=grad_params_no,
+                P_inf=P_inf,
+                dP_inf=dP_inf,
+                dF=dF,
+                dQc=dQc,
+            )
 
             self.As = As
             self.Qs = Qs
             self.dAs = dAs
             self.dQs = dQs
             self.reconstruct_indices = reconstruct_indices
-            self.total_size_of_data = self.As.nbytes + self.Qs.nbytes +\
-                            (self.dAs.nbytes if (self.dAs is not None) else 0) +\
-                            (self.dQs.nbytes if (self.dQs is not None) else 0) +\
-                            (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
+            self.total_size_of_data = (
+                self.As.nbytes
+                + self.Qs.nbytes
+                + (self.dAs.nbytes if (self.dAs is not None) else 0)
+                + (self.dQs.nbytes if (self.dQs is not None) else 0)
+                + (
+                    self.reconstruct_indices.nbytes
+                    if (self.reconstruct_indices is not None)
+                    else 0
+                )
+            )
 
             self.Q_svd_dict = {}
             self.Q_square_root_dict = {}
             self.Q_inverse_dict = {}
-            
+
             self.last_k = None
-             # !!!Print statistics! Which object is created
+            # !!!Print statistics! Which object is created
             # !!!Print statistics! Print sizes of matrices
 
-        def f_a(self, k,m,A):
+        def f_a(self, k, m, A):
             """
             Dynamic model
             """
-            return np.dot(A, m) # default dynamic model
+            return np.dot(A, m)  # default dynamic model
 
         def reset(self, compute_derivatives=False):
             """
@@ -2554,24 +3021,23 @@ class ContDescrStateSpace(DescreteStateSpace):
             """
             return self
 
-        def Ak(self,k,m,P):
+        def Ak(self, k, m, P):
             self.last_k = k
-            return self.As[:,:, self.reconstruct_indices[k]]
+            return self.As[:, :, self.reconstruct_indices[k]]
 
-        def Qk(self,k):
+        def Qk(self, k):
             self.last_k = k
-            return self.Qs[:,:, self.reconstruct_indices[k]]
+            return self.Qs[:, :, self.reconstruct_indices[k]]
 
-        def dAk(self,k):
+        def dAk(self, k):
             self.last_k = k
-            return self.dAs[:,:, :, self.reconstruct_indices[k]]
+            return self.dAs[:, :, :, self.reconstruct_indices[k]]
 
-        def dQk(self,k):
+        def dQk(self, k):
             self.last_k = k
-            return self.dQs[:,:, :, self.reconstruct_indices[k]]
+            return self.dQs[:, :, :, self.reconstruct_indices[k]]
 
-
-        def Q_srk(self,k):
+        def Q_srk(self, k):
             """
             Square root of the noise matrix Q
             """
@@ -2582,83 +3048,109 @@ class ContDescrStateSpace(DescreteStateSpace):
                 if matrix_index in self.Q_svd_dict:
                     (U, S, Vh) = self.Q_svd_dict[matrix_index]
                 else:
-                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
-                                        full_matrices=False, compute_uv=True,
-                                        overwrite_a=False, check_finite=False)
-                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
-                    
+                    (U, S, Vh) = sp.linalg.svd(
+                        self.Qs[:, :, matrix_index],
+                        full_matrices=False,
+                        compute_uv=True,
+                        overwrite_a=False,
+                        check_finite=False,
+                    )
+                    self.Q_svd_dict[matrix_index] = (U, S, Vh)
+
                 square_root = U * np.sqrt(S)
                 self.Q_square_root_dict[matrix_index] = square_root
 
             return square_root
-        
+
         def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
             """
             Function inverts Q matrix and regularizes the inverse.
             Regularization is useful when original matrix is badly conditioned.
             Function is currently used only in SparseGP code.
-            
+
             Inputs:
             ------------------------------
             k: int
             Iteration number.
-            
+
             p_largest_cond_num: float
             Largest condition value for the inverted matrix. If cond. number is smaller than that
             no regularization happen.
-            
+
             regularization_type: 1 or 2
             Regularization type.
-            
+
             regularization_type: int (1 or 2)
-            
+
                 type 1: 1/(S[k] + regularizer) regularizer is computed
                 type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
             """
-            #import pdb; pdb.set_trace()
-            
+            # import pdb; pdb.set_trace()
+
             matrix_index = self.reconstruct_indices[k]
             if matrix_index in self.Q_inverse_dict:
                 Q_inverse_r = self.Q_inverse_dict[matrix_index]
             else:
-                
                 if matrix_index in self.Q_svd_dict:
                     (U, S, Vh) = self.Q_svd_dict[matrix_index]
                 else:
-                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
-                                        full_matrices=False, compute_uv=True,
-                                        overwrite_a=False, check_finite=False)
-                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
-                
-                Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type)
+                    (U, S, Vh) = sp.linalg.svd(
+                        self.Qs[:, :, matrix_index],
+                        full_matrices=False,
+                        compute_uv=True,
+                        overwrite_a=False,
+                        check_finite=False,
+                    )
+                    self.Q_svd_dict[matrix_index] = (U, S, Vh)
+
+                Q_inverse_r = psd_matrix_inverse(
+                    k,
+                    0.5 * (self.Qs[:, :, matrix_index] + self.Qs[:, :, matrix_index].T),
+                    U,
+                    S,
+                    p_largest_cond_num,
+                    p_regularization_type,
+                )
                 self.Q_inverse_dict[matrix_index] = Q_inverse_r
 
             return Q_inverse_r
-            
-        
+
         def return_last(self):
             """
             Function returns last available matrices.
             """
 
-            if (self.last_k is None):
+            if self.last_k is None:
                 raise ValueError("Matrices are not computed.")
             else:
                 ind = self.reconstruct_indices[self.last_k]
-                A = self.As[:,:, ind]
-                Q = self.Qs[:,:, ind]
-                dA = self.dAs[:,:, :, ind]
-                dQ = self.dQs[:,:, :, ind]
+                A = self.As[:, :, ind]
+                Q = self.Qs[:, :, ind]
+                dA = self.dAs[:, :, :, ind]
+                dQ = self.dQs[:, :, :, ind]
 
             return self.last_k, A, Q, dA, dQ
 
     @classmethod
-    def cont_discr_kalman_filter(cls, F, L, Qc, p_H, p_R, P_inf, X, Y, index = None,
-                                 m_init=None, P_init=None,
-                                 p_kalman_filter_type='regular',
-                                 calc_log_likelihood=False,
-                                 calc_grad_log_likelihood=False,
-                                 grad_params_no=0, grad_calc_params=None):
+    def cont_discr_kalman_filter(
+        cls,
+        F,
+        L,
+        Qc,
+        p_H,
+        p_R,
+        P_inf,
+        X,
+        Y,
+        index=None,
+        m_init=None,
+        P_init=None,
+        p_kalman_filter_type="regular",
+        calc_log_likelihood=False,
+        calc_grad_log_likelihood=False,
+        grad_params_no=0,
+        grad_calc_params=None,
+    ):
         """
         This function implements the continuous-discrete Kalman Filter algorithm
         These notations for the State-Space model are assumed:
@@ -2800,18 +3292,21 @@ class ContDescrStateSpace(DescreteStateSpace):
         p_H = np.atleast_1d(p_H)
         p_R = np.atleast_1d(p_R)
 
-        X.shape, old_X_shape  = cls._reshape_input_data(X.shape, 2) # represent as column
-        if (X.shape[1] != 1):
+        X.shape, old_X_shape = cls._reshape_input_data(
+            X.shape, 2
+        )  # represent as column
+        if X.shape[1] != 1:
             raise ValueError("Only one dimensional X data is supported.")
 
-        Y.shape, old_Y_shape  = cls._reshape_input_data(Y.shape) # represent as column
+        Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape)  # represent as column
 
         state_dim = F.shape[0]
         measurement_dim = Y.shape[1]
-        time_series_no = Y.shape[2] # multiple time series mode
+        time_series_no = Y.shape[2]  # multiple time series mode
 
-        if  ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\
-            ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)):
+        if ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or (
+            (len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)
+        ):
             model_matrices_chage_with_time = True
         else:
             model_matrices_chage_with_time = False
@@ -2820,26 +3315,36 @@ class ContDescrStateSpace(DescreteStateSpace):
         old_index_shape = None
         if index is None:
             if (len(p_H.shape) == 3) or (len(p_R.shape) == 3):
-                raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)")
-            else: # matrices do not change in time, so form dummy zero indices.
-                index = np.zeros((1,Y.shape[0]))
+                raise ValueError(
+                    "Parameter index can not be None for time varying matrices (third dimension is present)"
+                )
+            else:  # matrices do not change in time, so form dummy zero indices.
+                index = np.zeros((1, Y.shape[0]))
         else:
             if len(index.shape) == 1:
-                index.shape = (1,index.shape[0])
+                index.shape = (1, index.shape[0])
                 old_index_shape = (index.shape[0],)
 
-            if (index.shape[1] != Y.shape[0]):
-                raise ValueError("Number of measurements must be equal the number of H_{k}, R_{k}")
+            if index.shape[1] != Y.shape[0]:
+                raise ValueError(
+                    "Number of measurements must be equal the number of H_{k}, R_{k}"
+                )
 
-        if (index.shape[0] == 1):
-            H_time_var_index = 0; R_time_var_index = 0
-        elif (index.shape[0] == 4):
-            H_time_var_index = 0; R_time_var_index = 1
+        if index.shape[0] == 1:
+            H_time_var_index = 0
+            R_time_var_index = 0
+        elif index.shape[0] == 4:
+            H_time_var_index = 0
+            R_time_var_index = 1
         else:
             raise ValueError("First Dimension of index must be either 1 or 2.")
 
-        (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H')
-        (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R')
+        (p_H, old_H_shape) = cls._check_SS_matrix(
+            p_H, state_dim, measurement_dim, which="H"
+        )
+        (p_R, old_R_shape) = cls._check_SS_matrix(
+            p_R, state_dim, measurement_dim, which="R"
+        )
 
         if m_init is None:
             m_init = np.zeros((state_dim, time_series_no))
@@ -2849,7 +3354,7 @@ class ContDescrStateSpace(DescreteStateSpace):
         if P_init is None:
             P_init = P_inf.copy()
 
-        if p_kalman_filter_type not in ('regular', 'svd'):
+        if p_kalman_filter_type not in ("regular", "svd"):
             raise ValueError("Kalman filer type neither 'regular nor 'svd'.")
 
         # Functions to pass to the kalman_filter algorithm:
@@ -2858,26 +3363,49 @@ class ContDescrStateSpace(DescreteStateSpace):
         # m - vector for calculating matrices. Required for EKF. Not used here.
         # f_hl = lambda k,m,H: np.dot(H, m)
         # f_H = lambda k,m,P: p_H[:,:, index[H_time_var_index, k]]
-        #f_R = lambda k: p_R[:,:, index[R_time_var_index, k]]
-        #o_R = R_handling( p_R, index, R_time_var_index, 20)
+        # f_R = lambda k: p_R[:,:, index[R_time_var_index, k]]
+        # o_R = R_handling( p_R, index, R_time_var_index, 20)
 
         if calc_grad_log_likelihood:
+            dF = cls._check_grad_state_matrices(
+                grad_calc_params.get("dF"), state_dim, grad_params_no, which="dA"
+            )
+            dQc = cls._check_grad_state_matrices(
+                grad_calc_params.get("dQc"), state_dim, grad_params_no, which="dQ"
+            )
+            dP_inf = cls._check_grad_state_matrices(
+                grad_calc_params.get("dP_inf"), state_dim, grad_params_no, which="dA"
+            )
 
-            dF = cls._check_grad_state_matrices(grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA')
-            dQc = cls._check_grad_state_matrices(grad_calc_params.get('dQc'), state_dim, grad_params_no, which = 'dQ')
-            dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dA')
+            dH = cls._check_grad_measurement_matrices(
+                grad_calc_params.get("dH"),
+                state_dim,
+                grad_params_no,
+                measurement_dim,
+                which="dH",
+            )
+            dR = cls._check_grad_measurement_matrices(
+                grad_calc_params.get("dR"),
+                state_dim,
+                grad_params_no,
+                measurement_dim,
+                which="dR",
+            )
 
-            dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH')
-            dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR')
-
-            dm_init = grad_calc_params.get('dm_init') # Initial values for the Kalman Filter
+            dm_init = grad_calc_params.get(
+                "dm_init"
+            )  # Initial values for the Kalman Filter
             if dm_init is None:
                 # multiple time series mode. Keep grad_params always as a last dimension
-                dm_init = np.zeros( (state_dim, time_series_no, grad_params_no) )
+                dm_init = np.zeros((state_dim, time_series_no, grad_params_no))
 
-            dP_init = grad_calc_params.get('dP_init') # Initial values for the Kalman Filter
+            dP_init = grad_calc_params.get(
+                "dP_init"
+            )  # Initial values for the Kalman Filter
             if dP_init is None:
-                dP_init = dP_inf(0).copy() # get the dP_init matrix, because now it is a function
+                dP_init = dP_inf(
+                    0
+                ).copy()  # get the dP_init matrix, because now it is a function
 
         else:
             dP_inf = None
@@ -2888,23 +3416,48 @@ class ContDescrStateSpace(DescreteStateSpace):
             dm_init = None
             dP_init = None
 
-        measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR)
-        #import pdb; pdb.set_trace()
+        measurement_callables = Std_Measurement_Callables_Class(
+            p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR
+        )
+        # import pdb; pdb.set_trace()
 
-        dynamic_callables = cls._cont_to_discrete_object(X, F, L, Qc, compute_derivatives=calc_grad_log_likelihood,
-                                              grad_params_no=grad_params_no,
-                                              P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc)
+        dynamic_callables = cls._cont_to_discrete_object(
+            X,
+            F,
+            L,
+            Qc,
+            compute_derivatives=calc_grad_log_likelihood,
+            grad_params_no=grad_params_no,
+            P_inf=P_inf,
+            dP_inf=dP_inf,
+            dF=dF,
+            dQc=dQc,
+        )
 
         if print_verbose:
             print("General: run Continuos-Discrete Kalman Filter")
         # Also for dH, dR and probably for all derivatives
-        (M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim,
-                        dynamic_callables, measurement_callables,
-                        X, Y, m_init=m_init, P_init=P_init,
-                        p_kalman_filter_type=p_kalman_filter_type,
-                        calc_log_likelihood=calc_log_likelihood,
-                        calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no,
-                        dm_init=dm_init, dP_init=dP_init)
+        (
+            M,
+            P,
+            log_likelihood,
+            grad_log_likelihood,
+            AQcomp,
+        ) = cls._cont_discr_kalman_filter_raw(
+            state_dim,
+            dynamic_callables,
+            measurement_callables,
+            X,
+            Y,
+            m_init=m_init,
+            P_init=P_init,
+            p_kalman_filter_type=p_kalman_filter_type,
+            calc_log_likelihood=calc_log_likelihood,
+            calc_grad_log_likelihood=calc_grad_log_likelihood,
+            grad_params_no=grad_params_no,
+            dm_init=dm_init,
+            dP_init=dP_init,
+        )
 
         if old_index_shape is not None:
             index.shape = old_index_shape
@@ -2924,12 +3477,22 @@ class ContDescrStateSpace(DescreteStateSpace):
         return (M, P, log_likelihood, grad_log_likelihood, AQcomp)
 
     @classmethod
-    def _cont_discr_kalman_filter_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, X, Y,
-                                      m_init, P_init,
-                                      p_kalman_filter_type='regular',
-                                      calc_log_likelihood=False,
-                      calc_grad_log_likelihood=False, grad_params_no=None,
-                      dm_init=None, dP_init=None):
+    def _cont_discr_kalman_filter_raw(
+        cls,
+        state_dim,
+        p_dynamic_callables,
+        p_measurement_callables,
+        X,
+        Y,
+        m_init,
+        P_init,
+        p_kalman_filter_type="regular",
+        calc_log_likelihood=False,
+        calc_grad_log_likelihood=False,
+        grad_params_no=None,
+        dm_init=None,
+        dP_init=None,
+    ):
         """
         General filtering algorithm for inference in the continuos-discrete
         state-space model:
@@ -3015,89 +3578,134 @@ class ContDescrStateSpace(DescreteStateSpace):
 
         """
 
-        #import pdb; pdb.set_trace()
-        steps_no = Y.shape[0] # number of steps in the Kalman Filter
-        time_series_no = Y.shape[2] # multiple time series mode
+        # import pdb; pdb.set_trace()
+        steps_no = Y.shape[0]  # number of steps in the Kalman Filter
+        time_series_no = Y.shape[2]  # multiple time series mode
 
         # Allocate space for results
         # Mean estimations. Initial values will be included
-        M = np.empty(((steps_no+1),state_dim,time_series_no))
-        M[0,:,:] = m_init # Initialize mean values
+        M = np.empty(((steps_no + 1), state_dim, time_series_no))
+        M[0, :, :] = m_init  # Initialize mean values
         # Variance estimations. Initial values will be included
-        P = np.empty(((steps_no+1),state_dim,state_dim))
-        P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful
-        P[0,:,:] = P_init # Initialize initial covariance matrix
+        P = np.empty(((steps_no + 1), state_dim, state_dim))
+        P_init = 0.5 * (
+            P_init + P_init.T
+        )  # symmetrize initial covariance. In some ustable cases this is uiseful
+        P[0, :, :] = P_init  # Initialize initial covariance matrix
 
-        #import pdb;pdb.set_trace()
-        if p_kalman_filter_type == 'svd':
-            (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True,
-                      overwrite_a=False,check_finite=True)
-            S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance
-            P_upd = (P_init, S,U)
-        #log_likelihood = 0
-        #grad_log_likelihood = np.zeros((grad_params_no,1))
+        # import pdb;pdb.set_trace()
+        if p_kalman_filter_type == "svd":
+            (U, S, Vh) = sp.linalg.svd(
+                P_init,
+                full_matrices=False,
+                compute_uv=True,
+                overwrite_a=False,
+                check_finite=True,
+            )
+            S[(S == 0)] = 1e-17  # allows to run algorithm for singular initial variance
+            P_upd = (P_init, S, U)
+        # log_likelihood = 0
+        # grad_log_likelihood = np.zeros((grad_params_no,1))
         log_likelihood = 0 if calc_log_likelihood else None
         grad_log_likelihood = 0 if calc_grad_log_likelihood else None
 
-        #setting initial values for derivatives update
+        # setting initial values for derivatives update
         dm_upd = dm_init
         dP_upd = dP_init
         # Main loop of the Kalman filter
-        for k in range(0,steps_no):
+        for k in range(0, steps_no):
             # In this loop index for new estimations is (k+1), old - (k)
             # This happened because initial values are stored at 0-th index.
-            #import pdb; pdb.set_trace()
+            # import pdb; pdb.set_trace()
 
-            prev_mean = M[k,:,:] # mean from the previous step
+            prev_mean = M[k, :, :]  # mean from the previous step
 
-            if p_kalman_filter_type == 'svd':
-                m_pred, P_pred, dm_pred, dP_pred = \
-                cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables,
+            if p_kalman_filter_type == "svd":
+                m_pred, P_pred, dm_pred, dP_pred = cls._kalman_prediction_step_SVD(
+                    k,
+                    prev_mean,
+                    P_upd,
+                    p_dynamic_callables,
                     calc_grad_log_likelihood=calc_grad_log_likelihood,
-                    p_dm = dm_upd, p_dP = dP_upd)
+                    p_dm=dm_upd,
+                    p_dP=dP_upd,
+                )
             else:
-                m_pred, P_pred, dm_pred, dP_pred = \
-                cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables,
+                m_pred, P_pred, dm_pred, dP_pred = cls._kalman_prediction_step(
+                    k,
+                    prev_mean,
+                    P[k, :, :],
+                    p_dynamic_callables,
                     calc_grad_log_likelihood=calc_grad_log_likelihood,
-                    p_dm = dm_upd, p_dP = dP_upd )
+                    p_dm=dm_upd,
+                    p_dP=dP_upd,
+                )
 
-            #import pdb; pdb.set_trace()
-            k_measurment = Y[k,:,:]
+            # import pdb; pdb.set_trace()
+            k_measurment = Y[k, :, :]
 
-            if (np.any(np.isnan(k_measurment)) == False):
+            if np.any(np.isnan(k_measurment)) == False:
+                if p_kalman_filter_type == "svd":
+                    (
+                        m_upd,
+                        P_upd,
+                        log_likelihood_update,
+                        dm_upd,
+                        dP_upd,
+                        d_log_likelihood_update,
+                    ) = cls._kalman_update_step_SVD(
+                        k,
+                        m_pred,
+                        P_pred,
+                        p_measurement_callables,
+                        k_measurment,
+                        calc_log_likelihood=calc_log_likelihood,
+                        calc_grad_log_likelihood=calc_grad_log_likelihood,
+                        p_dm=dm_pred,
+                        p_dP=dP_pred,
+                    )
 
-                if p_kalman_filter_type == 'svd':
-                    m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
-                    cls._kalman_update_step_SVD(k,  m_pred , P_pred, p_measurement_callables,
-                            k_measurment, calc_log_likelihood=calc_log_likelihood,
-                            calc_grad_log_likelihood=calc_grad_log_likelihood,
-                            p_dm = dm_pred, p_dP = dP_pred )
-
-
-    #                m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
-    #                cls._kalman_update_step(k,  m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment,
-    #                        calc_log_likelihood=calc_log_likelihood,
-    #                        calc_grad_log_likelihood=calc_grad_log_likelihood,
-    #                        p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR))
-    #
-    #                (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True,
-    #                      overwrite_a=False,check_finite=True)
-    #                P_upd = (P_upd, S,U)
+                #                m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
+                #                cls._kalman_update_step(k,  m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment,
+                #                        calc_log_likelihood=calc_log_likelihood,
+                #                        calc_grad_log_likelihood=calc_grad_log_likelihood,
+                #                        p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR))
+                #
+                #                (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True,
+                #                      overwrite_a=False,check_finite=True)
+                #                P_upd = (P_upd, S,U)
                 else:
-                    m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
-                    cls._kalman_update_step(k,  m_pred , P_pred, p_measurement_callables, k_measurment,
-                            calc_log_likelihood=calc_log_likelihood,
-                            calc_grad_log_likelihood=calc_grad_log_likelihood,
-                            p_dm = dm_pred, p_dP = dP_pred )
+                    (
+                        m_upd,
+                        P_upd,
+                        log_likelihood_update,
+                        dm_upd,
+                        dP_upd,
+                        d_log_likelihood_update,
+                    ) = cls._kalman_update_step(
+                        k,
+                        m_pred,
+                        P_pred,
+                        p_measurement_callables,
+                        k_measurment,
+                        calc_log_likelihood=calc_log_likelihood,
+                        calc_grad_log_likelihood=calc_grad_log_likelihood,
+                        p_dm=dm_pred,
+                        p_dP=dP_pred,
+                    )
             else:
-                if k_measurment.shape != (1,1):
-                    raise ValueError("Nan measurements are currently not supported for \
-                                     multidimensional output and multiple tiem series.")
+                if k_measurment.shape != (1, 1):
+                    raise ValueError(
+                        "Nan measurements are currently not supported for \
+                                     multidimensional output and multiple tiem series."
+                    )
                 else:
-                    m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
-                    log_likelihood_update = 0.0;
-                    d_log_likelihood_update = 0.0;
-
+                    m_upd = m_pred
+                    P_upd = P_pred
+                    dm_upd = dm_pred
+                    dP_upd = dP_pred
+                    log_likelihood_update = 0.0
+                    d_log_likelihood_update = 0.0
 
             if calc_log_likelihood:
                 log_likelihood += log_likelihood_update
@@ -3105,20 +3713,35 @@ class ContDescrStateSpace(DescreteStateSpace):
             if calc_grad_log_likelihood:
                 grad_log_likelihood += d_log_likelihood_update
 
-            M[k+1,:,:] = m_upd # separate mean value for each time series
+            M[k + 1, :, :] = m_upd  # separate mean value for each time series
 
-            if p_kalman_filter_type == 'svd':
-                P[k+1,:,:] = P_upd[0]
+            if p_kalman_filter_type == "svd":
+                P[k + 1, :, :] = P_upd[0]
             else:
-                P[k+1,:,:] = P_upd
-            #print("kf it: %i" % k)
+                P[k + 1, :, :] = P_upd
+            # print("kf it: %i" % k)
             # !!!Print statistics! Print sizes of matrices
             # !!!Print statistics! Print iteration time base on another boolean variable
-        return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False))
+        return (
+            M,
+            P,
+            log_likelihood,
+            grad_log_likelihood,
+            p_dynamic_callables.reset(False),
+        )
 
     @classmethod
-    def cont_discr_rts_smoother(cls,state_dim, filter_means, filter_covars,
-                                p_dynamic_callables=None, X=None, F=None,L=None,Qc=None):
+    def cont_discr_rts_smoother(
+        cls,
+        state_dim,
+        filter_means,
+        filter_covars,
+        p_dynamic_callables=None,
+        X=None,
+        F=None,
+        L=None,
+        Qc=None,
+    ):
         """
 
         Continuos-discrete Rauch–Tung–Striebel(RTS) smoother.
@@ -3158,45 +3781,78 @@ class ContDescrStateSpace(DescreteStateSpace):
             Smoothed estimates of the state covariances
         """
 
-        f_a = lambda k,m,A: np.dot(A, m) # state dynamic model
-        if p_dynamic_callables is None: # make this object from scratch
-            p_dynamic_callables = cls._cont_to_discrete_object(cls, X, F,L,Qc,f_a,compute_derivatives=False,
-                                                  grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None)
+        f_a = lambda k, m, A: np.dot(A, m)  # state dynamic model
+        if p_dynamic_callables is None:  # make this object from scratch
+            p_dynamic_callables = cls._cont_to_discrete_object(
+                cls,
+                X,
+                F,
+                L,
+                Qc,
+                f_a,
+                compute_derivatives=False,
+                grad_params_no=None,
+                P_inf=None,
+                dP_inf=None,
+                dF=None,
+                dQc=None,
+            )
 
-        no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance)
+        no_steps = (
+            filter_covars.shape[0] - 1
+        )  # number of steps (minus initial covariance)
 
-        M = np.empty(filter_means.shape) # smoothed means
-        P = np.empty(filter_covars.shape) # smoothed covars
+        M = np.empty(filter_means.shape)  # smoothed means
+        P = np.empty(filter_covars.shape)  # smoothed covars
 
         if print_verbose:
             print("General: run Continuos-Discrete Kalman Smoother")
 
-        M[-1,:,:] = filter_means[-1,:,:]
-        P[-1,:,:] = filter_covars[-1,:,:]
-        for k in range(no_steps-1,-1,-1):
+        M[-1, :, :] = filter_means[-1, :, :]
+        P[-1, :, :] = filter_covars[-1, :, :]
+        for k in range(no_steps - 1, -1, -1):
+            prev_mean = filter_means[k, :]  # mean from the previous step
+            m_pred, P_pred, tmp1, tmp2 = cls._kalman_prediction_step(
+                k,
+                prev_mean,
+                filter_covars[k, :, :],
+                p_dynamic_callables,
+                calc_grad_log_likelihood=False,
+            )
+            p_m = filter_means[k, :]
+            p_m_prev_step = M[(k + 1), :]
 
-            prev_mean = filter_means[k,:] # mean from the previous step
-            m_pred, P_pred, tmp1, tmp2 = \
-                    cls._kalman_prediction_step(k, prev_mean,
-                                                filter_covars[k,:,:], p_dynamic_callables,
-                                                calc_grad_log_likelihood=False)
-            p_m = filter_means[k,:]
-            p_m_prev_step = M[(k+1),:]
+            m_upd, P_upd, tmp_G = cls._rts_smoother_update_step(
+                k,
+                p_m,
+                filter_covars[k, :, :],
+                m_pred,
+                P_pred,
+                p_m_prev_step,
+                P[(k + 1), :, :],
+                p_dynamic_callables,
+            )
 
-            m_upd, P_upd, tmp_G = cls._rts_smoother_update_step(k,
-                            p_m ,filter_covars[k,:,:],
-                            m_pred, P_pred, p_m_prev_step ,P[(k+1),:,:], p_dynamic_callables)
-
-            M[k,:,:] = m_upd
-            P[k,:,:] = P_upd
+            M[k, :, :] = m_upd
+            P[k, :, :] = P_upd
         # Return values
         return (M, P)
 
     @classmethod
-    def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False,
-                                 grad_params_no=None,
-                                 P_inf=None, dP_inf=None, dF = None, dQc=None,
-                                 dt0=None):
+    def _cont_to_discrete_object(
+        cls,
+        X,
+        F,
+        L,
+        Qc,
+        compute_derivatives=False,
+        grad_params_no=None,
+        P_inf=None,
+        dP_inf=None,
+        dF=None,
+        dQc=None,
+        dt0=None,
+    ):
         """
         Function return the object which is used in Kalman filter and/or
         smoother to obtain matrices A, Q and their derivatives for discrete model
@@ -3230,53 +3886,121 @@ class ContDescrStateSpace(DescreteStateSpace):
         """
 
         unique_round_decimals = 10
-        threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time
+        threshold_number_of_unique_time_steps = (
+            20  # above which matrices are separately each time
+        )
         dt = np.empty((X.shape[0],))
-        dt[1:] = np.diff(X[:,0],axis=0)
+        dt[1:] = np.diff(X[:, 0], axis=0)
         if dt0 is None:
-            dt[0]  = 0#dt[1]
+            dt[0] = 0  # dt[1]
         else:
-            if isinstance(dt0,str):
+            if isinstance(dt0, str):
                 dt = dt[1:]
             else:
                 dt[0] = dt0
-            
+
         unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
         number_unique_indices = len(unique_indices)
 
-        #import pdb; pdb.set_trace()
+        # import pdb; pdb.set_trace()
         if use_cython:
-            class AQcompute_batch(state_space_cython.AQcompute_batch_Cython):
-                def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
-                    As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F,
-                                L,Qc,dt,compute_derivatives,
-                                grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
 
-                    super(AQcompute_batch,self).__init__(As, Qs, reconstruct_indices, dAs,dQs)
+            class AQcompute_batch(state_space_cython.AQcompute_batch_Cython):
+                def __init__(
+                    self,
+                    F,
+                    L,
+                    Qc,
+                    dt,
+                    compute_derivatives=False,
+                    grad_params_no=None,
+                    P_inf=None,
+                    dP_inf=None,
+                    dF=None,
+                    dQc=None,
+                ):
+                    (
+                        As,
+                        Qs,
+                        reconstruct_indices,
+                        dAs,
+                        dQs,
+                    ) = ContDescrStateSpace.lti_sde_to_descrete(
+                        F,
+                        L,
+                        Qc,
+                        dt,
+                        compute_derivatives,
+                        grad_params_no=grad_params_no,
+                        P_inf=P_inf,
+                        dP_inf=dP_inf,
+                        dF=dF,
+                        dQc=dQc,
+                    )
+
+                    super(AQcompute_batch, self).__init__(
+                        As, Qs, reconstruct_indices, dAs, dQs
+                    )
+
         else:
             AQcompute_batch = cls.AQcompute_batch_Python
 
         if number_unique_indices > threshold_number_of_unique_time_steps:
-            AQcomp = cls.AQcompute_once(F,L,Qc, dt,compute_derivatives=compute_derivatives,
-                                    grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
+            AQcomp = cls.AQcompute_once(
+                F,
+                L,
+                Qc,
+                dt,
+                compute_derivatives=compute_derivatives,
+                grad_params_no=grad_params_no,
+                P_inf=P_inf,
+                dP_inf=dP_inf,
+                dF=dF,
+                dQc=dQc,
+            )
             if print_verbose:
                 print("CDO:  Continue-to-discrete INSTANTANEOUS object is created.")
-                print("CDO:  Number of different time steps: %i" % (number_unique_indices,) )
+                print(
+                    "CDO:  Number of different time steps: %i"
+                    % (number_unique_indices,)
+                )
 
         else:
-            AQcomp = AQcompute_batch(F,L,Qc,dt,compute_derivatives=compute_derivatives,
-                                    grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
+            AQcomp = AQcompute_batch(
+                F,
+                L,
+                Qc,
+                dt,
+                compute_derivatives=compute_derivatives,
+                grad_params_no=grad_params_no,
+                P_inf=P_inf,
+                dP_inf=dP_inf,
+                dF=dF,
+                dQc=dQc,
+            )
             if print_verbose:
                 print("CDO:  Continue-to-discrete BATCH object is created.")
-                print("CDO:  Number of different time steps: %i" % (number_unique_indices,) )
-                print("CDO:  Total size if its data: %i" % (AQcomp.total_size_of_data,) )
+                print(
+                    "CDO:  Number of different time steps: %i"
+                    % (number_unique_indices,)
+                )
+                print("CDO:  Total size if its data: %i" % (AQcomp.total_size_of_data,))
 
         return AQcomp
 
     @staticmethod
-    def lti_sde_to_descrete(F,L,Qc,dt,compute_derivatives=False,
-                            grad_params_no=None, P_inf=None,
-                            dP_inf=None, dF = None, dQc=None):
+    def lti_sde_to_descrete(
+        F,
+        L,
+        Qc,
+        dt,
+        compute_derivatives=False,
+        grad_params_no=None,
+        P_inf=None,
+        dP_inf=None,
+        dF=None,
+        dQc=None,
+    ):
         """
         Linear Time-Invariant Stochastic Differential Equation (LTI SDE):
 
@@ -3294,7 +4018,7 @@ class ContDescrStateSpace(DescreteStateSpace):
         TODO: this function can be redone to "preprocess dataset", when
         close time points are handeled properly (with rounding parameter) and
         values are averaged accordingly.
-        
+
         Input:
         --------------
         F,L: LTI SDE matrices of corresponding dimensions
@@ -3354,106 +4078,123 @@ class ContDescrStateSpace(DescreteStateSpace):
         # Dimensionality
         n = F.shape[0]
 
-        if not isinstance(dt, collections.Iterable): # not iterable, scalar
-            #import pdb; pdb.set_trace()
+        if not isinstance(dt, collections.Iterable):  # not iterable, scalar
+            # import pdb; pdb.set_trace()
             # The dynamical model
-            A  = matrix_exponent(F*dt)
+            A = matrix_exponent(F * dt)
 
             # The covariance matrix Q by matrix fraction decomposition ->
-            Phi = np.zeros((2*n,2*n))
-            Phi[:n,:n] = F
-            Phi[:n,n:] = L.dot(Qc).dot(L.T)
-            Phi[n:,n:] = -F.T
-            AB = matrix_exponent(Phi*dt)
-            AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n))))
+            Phi = np.zeros((2 * n, 2 * n))
+            Phi[:n, :n] = F
+            Phi[:n, n:] = L.dot(Qc).dot(L.T)
+            Phi[n:, n:] = -F.T
+            AB = matrix_exponent(Phi * dt)
+            AB = np.dot(AB, np.vstack((np.zeros((n, n)), np.eye(n))))
 
-            Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T)
-            Q_noise_2  = P_inf - A.dot(P_inf).dot(A.T)
+            Q_noise_1 = linalg.solve(AB[n:, :].T, AB[:n, :].T)
+            Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T)
             # The covariance matrix Q by matrix fraction decomposition <-
 
             if compute_derivatives:
                 dA = np.zeros([n, n, grad_params_no])
                 dQ = np.zeros([n, n, grad_params_no])
 
-                #AA  = np.zeros([2*n, 2*n, nparam])
-                FF  = np.zeros([2*n, 2*n])
-                AA = np.zeros([2*n, 2*n, grad_params_no])
+                # AA  = np.zeros([2*n, 2*n, nparam])
+                FF = np.zeros([2 * n, 2 * n])
+                AA = np.zeros([2 * n, 2 * n, grad_params_no])
 
                 for p in range(0, grad_params_no):
-
-                    FF[:n,:n] = F
-                    FF[n:,:n] = dF[:,:,p]
-                    FF[n:,n:] = F
+                    FF[:n, :n] = F
+                    FF[n:, :n] = dF[:, :, p]
+                    FF[n:, n:] = F
 
                     # Solve the matrix exponential
-                    AA[:,:,p] = matrix_exponent(FF*dt)
+                    AA[:, :, p] = matrix_exponent(FF * dt)
 
                     # Solve the differential equation
-                    #foo         = AA[:,:,p].dot(np.vstack([m, dm[:,p]]))
-                    #mm          = foo[:n,:]
-                    #dm[:,p] = foo[n:,:]
+                    # foo         = AA[:,:,p].dot(np.vstack([m, dm[:,p]]))
+                    # mm          = foo[:n,:]
+                    # dm[:,p] = foo[n:,:]
 
                     # The discrete-time dynamical model*
-                    if p==0:
-                        A  = AA[:n,:n,p]
-                        Q_noise_3  = P_inf - A.dot(P_inf).dot(A.T)
+                    if p == 0:
+                        A = AA[:n, :n, p]
+                        Q_noise_3 = P_inf - A.dot(P_inf).dot(A.T)
                         Q_noise = Q_noise_3
-                        #PP = A.dot(P).dot(A.T) + Q_noise_2
+                        # PP = A.dot(P).dot(A.T) + Q_noise_2
 
                     # The derivatives of A and Q
-                    dA[:,:,p] = AA[n:,:n,p]
-                    tmp = dA[:,:,p].dot(P_inf).dot(A.T)
-                    dQ[:,:,p] = dP_inf[:,:,p] - tmp \
-                       - A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T
-                    
-                    dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize
+                    dA[:, :, p] = AA[n:, :n, p]
+                    tmp = dA[:, :, p].dot(P_inf).dot(A.T)
+                    dQ[:, :, p] = (
+                        dP_inf[:, :, p] - tmp - A.dot(dP_inf[:, :, p]).dot(A.T) - tmp.T
+                    )
+
+                    dQ[:, :, p] = 0.5 * (dQ[:, :, p] + dQ[:, :, p].T)  # Symmetrize
             else:
-              dA = None
-              dQ = None
-              Q_noise = Q_noise_2
-	      # Innacuracies have been observed when Q_noise_1 was used.
-	
-            #Q_noise = Q_noise_1
+                dA = None
+                dQ = None
+                Q_noise = Q_noise_2
+            # Innacuracies have been observed when Q_noise_1 was used.
 
-            Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize
-            return A, Q_noise,None, dA, dQ
+            # Q_noise = Q_noise_1
 
-        else: # iterable, array
+            Q_noise = 0.5 * (Q_noise + Q_noise.T)  # Symmetrize
+            return A, Q_noise, None, dA, dQ
 
+        else:  # iterable, array
             # Time discretizations (round to 14 decimals to avoid problems)
-            dt_unique, tmp, reconstruct_index = np.unique(np.round(dt,8),
-                                        return_index=True,return_inverse=True)
+            dt_unique, tmp, reconstruct_index = np.unique(
+                np.round(dt, 8), return_index=True, return_inverse=True
+            )
             del tmp
             # Allocate space for A and Q
-            A = np.empty((n,n,dt_unique.shape[0]))
-            Q_noise = np.empty((n,n,dt_unique.shape[0]))
+            A = np.empty((n, n, dt_unique.shape[0]))
+            Q_noise = np.empty((n, n, dt_unique.shape[0]))
 
             if compute_derivatives:
-                dA = np.empty((n,n,grad_params_no,dt_unique.shape[0]))
-                dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0]))
+                dA = np.empty((n, n, grad_params_no, dt_unique.shape[0]))
+                dQ = np.empty((n, n, grad_params_no, dt_unique.shape[0]))
             else:
                 dA = None
                 dQ = None
             # Call this function for each unique dt
-            for j in range(0,dt_unique.shape[0]):
-                A[:,:,j], Q_noise[:,:,j], tmp1, dA_t, dQ_t = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j],
-                    compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc)
+            for j in range(0, dt_unique.shape[0]):
+                (
+                    A[:, :, j],
+                    Q_noise[:, :, j],
+                    tmp1,
+                    dA_t,
+                    dQ_t,
+                ) = ContDescrStateSpace.lti_sde_to_descrete(
+                    F,
+                    L,
+                    Qc,
+                    dt_unique[j],
+                    compute_derivatives=compute_derivatives,
+                    grad_params_no=grad_params_no,
+                    P_inf=P_inf,
+                    dP_inf=dP_inf,
+                    dF=dF,
+                    dQc=dQc,
+                )
                 if compute_derivatives:
-                    dA[:,:,:,j] = dA_t
-                    dQ[:,:,:,j] = dQ_t
+                    dA[:, :, :, j] = dA_t
+                    dQ[:, :, :, j] = dQ_t
 
             # Return
             return A, Q_noise, reconstruct_index, dA, dQ
 
+
 def matrix_exponent(M):
     """
     The function computes matrix exponent and handles some special cases
     """
 
-    if (M.shape[0] == 1): # 1*1 matrix
-        Mexp = np.array( ((np.exp(M[0,0]) ,),) )
+    if M.shape[0] == 1:  # 1*1 matrix
+        Mexp = np.array(((np.exp(M[0, 0]),),))
 
-    else: # matrix is larger
+    else:  # matrix is larger
         method = None
         try:
             Mexp = linalg.expm(M)
@@ -3473,6 +4214,7 @@ def matrix_exponent(M):
 
     return Mexp
 
+
 def balance_matrix(A):
     """
     Balance matrix, i.e. finds such similarity transformation of the original
@@ -3503,16 +4245,19 @@ def balance_matrix(A):
     """
 
     if len(A.shape) != 2 or (A.shape[0] != A.shape[1]):
-        raise ValueError('balance_matrix: Expecting square matrix')
+        raise ValueError("balance_matrix: Expecting square matrix")
 
-    N = A.shape[0] # matrix size
+    N = A.shape[0]  # matrix size
 
-    gebal = sp.linalg.lapack.get_lapack_funcs('gebal',(A,))
-    bA, lo, hi, pivscale, info = gebal(A, permute=True, scale=True,overwrite_a=False)
+    gebal = sp.linalg.lapack.get_lapack_funcs("gebal", (A,))
+    bA, lo, hi, pivscale, info = gebal(A, permute=True, scale=True, overwrite_a=False)
     if info < 0:
-        raise ValueError('balance_matrix: Illegal value in %d-th argument of internal gebal ' % -info)
+        raise ValueError(
+            "balance_matrix: Illegal value in %d-th argument of internal gebal " % -info
+        )
+
     # calculating the similarity transforamtion:
-    def perm_matr(D, c1,c2):
+    def perm_matr(D, c1, c2):
         """
         Function creates the permutation matrix which swaps columns c1 and c2.
 
@@ -3525,33 +4270,39 @@ def balance_matrix(A):
         c2: int
             Column 2. Numeration starts from 1...D
         """
-        i1 = c1-1; i2 = c2-1 # indices
-        P = np.eye(D);
-        P[i1,i1] = 0.0; P[i2,i2] = 0.0; # nullify diagonal elements
-        P[i1,i2] = 1.0; P[i2,i1] = 1.0
+        i1 = c1 - 1
+        i2 = c2 - 1  # indices
+        P = np.eye(D)
+        P[i1, i1] = 0.0
+        P[i2, i2] = 0.0
+        # nullify diagonal elements
+        P[i1, i2] = 1.0
+        P[i2, i1] = 1.0
 
         return P
 
-    P = np.eye(N) # permutation matrix
-    if (hi != N-1): # there are row permutations
-        for k in range(N-1,hi,-1):
-            new_perm = perm_matr(N, k+1, pivscale[k])
-            P = np.dot(P,new_perm)
-    if (lo != 0):
-        for k in range(0,lo,1):
-            new_perm = perm_matr(N, k+1, pivscale[k])
-            P = np.dot(P,new_perm)
+    P = np.eye(N)  # permutation matrix
+    if hi != N - 1:  # there are row permutations
+        for k in range(N - 1, hi, -1):
+            new_perm = perm_matr(N, k + 1, pivscale[k])
+            P = np.dot(P, new_perm)
+    if lo != 0:
+        for k in range(0, lo, 1):
+            new_perm = perm_matr(N, k + 1, pivscale[k])
+            P = np.dot(P, new_perm)
     D = pivscale.copy()
-    D[0:lo] = 1.0; D[hi+1:N] = 1.0 # thesee scaling factors must be set to one.
-    #D = np.diag(D) # make a diagonal matrix
+    D[0:lo] = 1.0
+    D[hi + 1 : N] = 1.0  # thesee scaling factors must be set to one.
+    # D = np.diag(D) # make a diagonal matrix
 
-    T = np.dot(P,np.diag(D)) # similarity transformation in question
-    T_inv = np.dot(np.diag(D**(-1)),P.T)
+    T = np.dot(P, np.diag(D))  # similarity transformation in question
+    T_inv = np.dot(np.diag(D ** (-1)), P.T)
 
-    #print( np.max(A - np.dot(T, np.dot(bA, T_inv) )) )
+    # print( np.max(A - np.dot(T, np.dot(bA, T_inv) )) )
     return bA.copy(), T, T_inv
 
-def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
+
+def balance_ss_model(F, L, Qc, H, Pinf, P0, dF=None, dQc=None, dPinf=None, dP0=None):
     """
     Balances State-Space model for more numerical stability
 
@@ -3566,28 +4317,28 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
          y = H T z
     """
 
-    bF,T,T_inv = balance_matrix(F)
+    bF, T, T_inv = balance_matrix(F)
 
-    bL = np.dot( T_inv, L)
-    bQc = Qc # not affected
+    bL = np.dot(T_inv, L)
+    bQc = Qc  # not affected
 
     bH = np.dot(H, T)
 
     bPinf = np.dot(T_inv, np.dot(Pinf, T_inv.T))
 
-    #import pdb; pdb.set_trace()
-#    LL,islower = linalg.cho_factor(Pinf)
-#    inds = np.triu_indices(Pinf.shape[0],k=1)
-#    LL[inds] = 0.0
-#    bLL = np.dot(T_inv, LL)
-#    bPinf = np.dot( bLL, bLL.T)
+    # import pdb; pdb.set_trace()
+    #    LL,islower = linalg.cho_factor(Pinf)
+    #    inds = np.triu_indices(Pinf.shape[0],k=1)
+    #    LL[inds] = 0.0
+    #    bLL = np.dot(T_inv, LL)
+    #    bPinf = np.dot( bLL, bLL.T)
 
     bP0 = np.dot(T_inv, np.dot(P0, T_inv.T))
 
     if dF is not None:
         bdF = np.zeros(dF.shape)
         for i in range(dF.shape[2]):
-            bdF[:,:,i] = np.dot( T_inv, np.dot( dF[:,:,i], T))
+            bdF[:, :, i] = np.dot(T_inv, np.dot(dF[:, :, i], T))
 
     else:
         bdF = None
@@ -3595,14 +4346,13 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
     if dPinf is not None:
         bdPinf = np.zeros(dPinf.shape)
         for i in range(dPinf.shape[2]):
-            bdPinf[:,:,i] = np.dot( T_inv, np.dot( dPinf[:,:,i], T_inv.T))
-
-#            LL,islower = linalg.cho_factor(dPinf[:,:,i])
-#            inds = np.triu_indices(dPinf[:,:,i].shape[0],k=1)
-#            LL[inds] = 0.0
-#            bLL = np.dot(T_inv, LL)
-#            bdPinf[:,:,i] = np.dot( bLL, bLL.T)
+            bdPinf[:, :, i] = np.dot(T_inv, np.dot(dPinf[:, :, i], T_inv.T))
 
+    #            LL,islower = linalg.cho_factor(dPinf[:,:,i])
+    #            inds = np.triu_indices(dPinf[:,:,i].shape[0],k=1)
+    #            LL[inds] = 0.0
+    #            bLL = np.dot(T_inv, LL)
+    #            bdPinf[:,:,i] = np.dot( bLL, bLL.T)
 
     else:
         bdPinf = None
@@ -3610,12 +4360,11 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
     if dP0 is not None:
         bdP0 = np.zeros(dP0.shape)
         for i in range(dP0.shape[2]):
-            bdP0[:,:,i] = np.dot( T_inv, np.dot( dP0[:,:,i], T_inv.T))
+            bdP0[:, :, i] = np.dot(T_inv, np.dot(dP0[:, :, i], T_inv.T))
     else:
         bdP0 = None
 
-
-    bdQc = dQc # not affected
+    bdQc = dQc  # not affected
 
     # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
 
diff --git a/GPy/plotting/matplot_dep/base_plots.py b/GPy/plotting/matplot_dep/base_plots.py
index e43f8efa..1eaf7d6c 100644
--- a/GPy/plotting/matplot_dep/base_plots.py
+++ b/GPy/plotting/matplot_dep/base_plots.py
@@ -5,6 +5,7 @@ import numpy as np
 
 from .util import align_subplot_array, align_subplots
 
+
 def ax_default(fignum, ax):
     if ax is None:
         fig = plt.figure(fignum)
@@ -13,11 +14,23 @@ def ax_default(fignum, ax):
         fig = ax.figure
     return fig, ax
 
-def meanplot(x, mu, color='#3300FF', ax=None, fignum=None, linewidth=2,**kw):
-    _, axes = ax_default(fignum, ax)
-    return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
 
-def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, fignum=None, **kwargs):
+def meanplot(x, mu, color="#3300FF", ax=None, fignum=None, linewidth=2, **kw):
+    _, axes = ax_default(fignum, ax)
+    return axes.plot(x, mu, color=color, linewidth=linewidth, **kw)
+
+
+def gpplot(
+    x,
+    mu,
+    lower,
+    upper,
+    edgecol="#3300FF",
+    fillcol="#33CCFF",
+    ax=None,
+    fignum=None,
+    **kwargs
+):
     _, axes = ax_default(fignum, ax)
 
     mu = mu.flatten()
@@ -27,51 +40,62 @@ def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, f
 
     plots = []
 
-    #here's the mean
+    # here's the mean
     plots.append(meanplot(x, mu, edgecol, axes))
 
-    #here's the box
-    kwargs['linewidth']=0.5
-    if not 'alpha' in kwargs.keys():
-        kwargs['alpha'] = 0.3
-    plots.append(axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs))
+    # here's the box
+    kwargs["linewidth"] = 0.5
+    if not "alpha" in kwargs.keys():
+        kwargs["alpha"] = 0.3
+    plots.append(
+        axes.fill(
+            np.hstack((x, x[::-1])),
+            np.hstack((upper, lower[::-1])),
+            color=fillcol,
+            **kwargs
+        )
+    )
 
-    #this is the edge:
-    plots.append(meanplot(x, upper,color=edgecol, linewidth=0.2, ax=axes))
-    plots.append(meanplot(x, lower,color=edgecol, linewidth=0.2, ax=axes))
+    # this is the edge:
+    plots.append(meanplot(x, upper, color=edgecol, linewidth=0.2, ax=axes))
+    plots.append(meanplot(x, lower, color=edgecol, linewidth=0.2, ax=axes))
 
     return plots
 
+
 def gradient_fill(x, percentiles, ax=None, fignum=None, **kwargs):
     _, ax = ax_default(fignum, ax)
 
     plots = []
 
-    #here's the box
-    if 'linewidth' not in kwargs:
-        kwargs['linewidth'] = 0.5
-    if not 'alpha' in kwargs.keys():
-        kwargs['alpha'] = 1./(len(percentiles))
+    # here's the box
+    if "linewidth" not in kwargs:
+        kwargs["linewidth"] = 0.5
+    if not "alpha" in kwargs.keys():
+        kwargs["alpha"] = 1.0 / (len(percentiles))
 
     # pop where from kwargs
-    where = kwargs.pop('where') if 'where' in kwargs else None
+    where = kwargs.pop("where") if "where" in kwargs else None
     # pop interpolate, which we actually do not do here!
-    if 'interpolate' in kwargs: kwargs.pop('interpolate')
+    if "interpolate" in kwargs:
+        kwargs.pop("interpolate")
 
     def pairwise(inlist):
         l = len(inlist)
-        for i in range(int(np.ceil(l/2.))):
-            yield inlist[:][i], inlist[:][(l-1)-i]
+        for i in range(int(np.ceil(l / 2.0))):
+            yield inlist[:][i], inlist[:][(l - 1) - i]
 
     polycol = []
     for y1, y2 in pairwise(percentiles):
         import matplotlib.mlab as mlab
+
         # Handle united data, such as dates
         ax._process_unit_info(xdata=x, ydata=y1)
         ax._process_unit_info(ydata=y2)
 
         # Convert the arrays so we can work with them
         from numpy import ma
+
         x = ma.masked_invalid(ax.convert_xunits(x))
         y1 = ma.masked_invalid(ax.convert_yunits(y1))
         y2 = ma.masked_invalid(ax.convert_yunits(y2))
@@ -103,7 +127,7 @@ def gradient_fill(x, percentiles, ax=None, fignum=None, **kwargs):
                 continue
 
             N = len(xslice)
-            X = np.zeros((2 * N + 2, 2), np.float)
+            X = np.zeros((2 * N + 2, 2), float)
 
             # the purpose of the next two lines is for when y2 is a
             # scalar like 0 and we want the fill to go all the way
@@ -114,19 +138,21 @@ def gradient_fill(x, percentiles, ax=None, fignum=None, **kwargs):
             X[0] = start
             X[N + 1] = end
 
-            X[1:N + 1, 0] = xslice
-            X[1:N + 1, 1] = y1slice
-            X[N + 2:, 0] = xslice[::-1]
-            X[N + 2:, 1] = y2slice[::-1]
+            X[1 : N + 1, 0] = xslice
+            X[1 : N + 1, 1] = y1slice
+            X[N + 2 :, 0] = xslice[::-1]
+            X[N + 2 :, 1] = y2slice[::-1]
 
             polys.append(X)
         polycol.extend(polys)
     from matplotlib.collections import PolyCollection
+
     plots.append(PolyCollection(polycol, **kwargs))
     ax.add_collection(plots[-1], autolim=True)
     ax.autoscale_view()
     return plots
 
+
 def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs):
     _, axes = ax_default(fignum, ax)
 
@@ -138,17 +164,19 @@ def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs):
     plots = []
 
     if edgecol is None:
-        edgecol='#3300FF'
+        edgecol = "#3300FF"
 
-    if not 'alpha' in kwargs.keys():
-        kwargs['alpha'] = 1.
+    if not "alpha" in kwargs.keys():
+        kwargs["alpha"] = 1.0
 
+    if not "lw" in kwargs.keys():
+        kwargs["lw"] = 1.0
 
-    if not 'lw' in kwargs.keys():
-        kwargs['lw'] = 1.
-
-
-    plots.append(axes.errorbar(x,mu,yerr=np.vstack([mu-lower,upper-mu]),color=edgecol,**kwargs))
+    plots.append(
+        axes.errorbar(
+            x, mu, yerr=np.vstack([mu - lower, upper - mu]), color=edgecol, **kwargs
+        )
+    )
     plots[-1][0].remove()
     return plots
 
@@ -156,53 +184,60 @@ def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs):
 def removeRightTicks(ax=None):
     ax = ax or plt.gca()
     for i, line in enumerate(ax.get_yticklines()):
-        if i%2 == 1:   # odd indices
+        if i % 2 == 1:  # odd indices
             line.set_visible(False)
 
+
 def removeUpperTicks(ax=None):
     ax = ax or plt.gca()
     for i, line in enumerate(ax.get_xticklines()):
-        if i%2 == 1:   # odd indices
+        if i % 2 == 1:  # odd indices
             line.set_visible(False)
 
-def fewerXticks(ax=None,divideby=2):
+
+def fewerXticks(ax=None, divideby=2):
     ax = ax or plt.gca()
     ax.set_xticks(ax.get_xticks()[::divideby])
 
-def x_frame1D(X,plot_limits=None,resolution=None):
+
+def x_frame1D(X, plot_limits=None, resolution=None):
     """
     Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
     """
-    assert X.shape[1] ==1, "x_frame1D is defined for one-dimensional inputs"
+    assert X.shape[1] == 1, "x_frame1D is defined for one-dimensional inputs"
     if plot_limits is None:
         from ...core.parameterization.variational import VariationalPosterior
+
         if isinstance(X, VariationalPosterior):
-            xmin,xmax = X.mean.min(0),X.mean.max(0)
+            xmin, xmax = X.mean.min(0), X.mean.max(0)
         else:
-            xmin,xmax = X.min(0),X.max(0)
-        xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
-    elif len(plot_limits)==2:
+            xmin, xmax = X.min(0), X.max(0)
+        xmin, xmax = xmin - 0.2 * (xmax - xmin), xmax + 0.2 * (xmax - xmin)
+    elif len(plot_limits) == 2:
         xmin, xmax = plot_limits
     else:
         raise ValueError("Bad limits for plotting")
 
-    Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
+    Xnew = np.linspace(xmin, xmax, resolution or 200)[:, None]
     return Xnew, xmin, xmax
 
-def x_frame2D(X,plot_limits=None,resolution=None):
+
+def x_frame2D(X, plot_limits=None, resolution=None):
     """
     Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
     """
-    assert X.shape[1] ==2, "x_frame2D is defined for two-dimensional inputs"
+    assert X.shape[1] == 2, "x_frame2D is defined for two-dimensional inputs"
     if plot_limits is None:
-        xmin,xmax = X.min(0),X.max(0)
-        xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
-    elif len(plot_limits)==2:
+        xmin, xmax = X.min(0), X.max(0)
+        xmin, xmax = xmin - 0.2 * (xmax - xmin), xmax + 0.2 * (xmax - xmin)
+    elif len(plot_limits) == 2:
         xmin, xmax = plot_limits
     else:
         raise ValueError("Bad limits for plotting")
 
     resolution = resolution or 50
-    xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
-    Xnew = np.vstack((xx.flatten(),yy.flatten())).T
+    xx, yy = np.mgrid[
+        xmin[0] : xmax[0] : 1j * resolution, xmin[1] : xmax[1] : 1j * resolution
+    ]
+    Xnew = np.vstack((xx.flatten(), yy.flatten())).T
     return Xnew, xx, yy, xmin, xmax
diff --git a/GPy/plotting/matplot_dep/plot_definitions.py b/GPy/plotting/matplot_dep/plot_definitions.py
index 7fadbf67..e462dea2 100644
--- a/GPy/plotting/matplot_dep/plot_definitions.py
+++ b/GPy/plotting/matplot_dep/plot_definitions.py
@@ -1,4 +1,4 @@
-#===============================================================================
+# ===============================================================================
 # Copyright (c) 2015, Max Zwiessele
 # All rights reserved.
 #
@@ -26,7 +26,7 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#===============================================================================
+# ===============================================================================
 import numpy as np
 from matplotlib import pyplot as plt
 from ..abstract_plotting_library import AbstractPlottingLibrary
@@ -37,6 +37,7 @@ from .controllers import ImshowController, ImAnnotateController
 import itertools
 from .util import legend_ontop
 
+
 class MatplotlibPlots(AbstractPlottingLibrary):
     def __init__(self):
         super(MatplotlibPlots, self).__init__()
@@ -49,54 +50,86 @@ class MatplotlibPlots(AbstractPlottingLibrary):
         fig.gridspec = plt.GridSpec(rows, cols, **gridspec_kwargs)
         return fig
 
-    def new_canvas(self, figure=None, row=1, col=1, projection='2d', xlabel=None, ylabel=None, zlabel=None, title=None, xlim=None, ylim=None, zlim=None, **kwargs):
-        if projection == '3d':
+    def new_canvas(
+        self,
+        figure=None,
+        row=1,
+        col=1,
+        projection="2d",
+        xlabel=None,
+        ylabel=None,
+        zlabel=None,
+        title=None,
+        xlim=None,
+        ylim=None,
+        zlim=None,
+        **kwargs
+    ):
+        if projection == "3d":
             from mpl_toolkits.mplot3d import Axes3D
-        elif projection == '2d':
+        elif projection == "2d":
             projection = None
-        if 'ax' in kwargs:
-            ax = kwargs.pop('ax')
+        if "ax" in kwargs:
+            ax = kwargs.pop("ax")
         else:
             if figure is not None:
                 fig = figure
-            elif 'num' in kwargs and 'figsize' in kwargs:
-                fig = self.figure(num=kwargs.pop('num'), figsize=kwargs.pop('figsize'))
-            elif 'num' in kwargs:
-                fig = self.figure(num=kwargs.pop('num'))
-            elif 'figsize' in kwargs:
-                fig = self.figure(figsize=kwargs.pop('figsize'))
+            elif "num" in kwargs and "figsize" in kwargs:
+                fig = self.figure(num=kwargs.pop("num"), figsize=kwargs.pop("figsize"))
+            elif "num" in kwargs:
+                fig = self.figure(num=kwargs.pop("num"))
+            elif "figsize" in kwargs:
+                fig = self.figure(figsize=kwargs.pop("figsize"))
             else:
                 fig = self.figure()
 
-            #if hasattr(fig, 'rows') and hasattr(fig, 'cols'):
-            ax = fig.add_subplot(fig.gridspec[row-1, col-1], projection=projection)
+            # if hasattr(fig, 'rows') and hasattr(fig, 'cols'):
+            ax = fig.add_subplot(fig.gridspec[row - 1, col - 1], projection=projection)
 
-        if xlim is not None: ax.set_xlim(xlim)
-        if ylim is not None: ax.set_ylim(ylim)
-        if xlabel is not None: ax.set_xlabel(xlabel)
-        if ylabel is not None: ax.set_ylabel(ylabel)
-        if title is not None: ax.set_title(title)
-        if projection == '3d':
-            if zlim is not None: ax.set_zlim(zlim)
-            if zlabel is not None: ax.set_zlabel(zlabel)
+        if xlim is not None:
+            ax.set_xlim(xlim)
+        if ylim is not None:
+            ax.set_ylim(ylim)
+        if xlabel is not None:
+            ax.set_xlabel(xlabel)
+        if ylabel is not None:
+            ax.set_ylabel(ylabel)
+        if title is not None:
+            ax.set_title(title)
+        if projection == "3d":
+            if zlim is not None:
+                ax.set_zlim(zlim)
+            if zlabel is not None:
+                ax.set_zlabel(zlabel)
         return ax, kwargs
 
     def add_to_canvas(self, ax, plots, legend=False, title=None, **kwargs):
-        #ax.autoscale_view()
-        fontdict=dict(family='sans-serif', weight='light', size=9)
+        # ax.autoscale_view()
+        fontdict = dict(family="sans-serif", weight="light", size=9)
         if legend is True:
             ax.legend(*ax.get_legend_handles_labels())
         elif legend >= 1:
-            #ax.legend(prop=fontdict)
+            # ax.legend(prop=fontdict)
             legend_ontop(ax, ncol=legend, fontdict=fontdict)
-        if title is not None: ax.figure.suptitle(title)
+        if title is not None:
+            ax.figure.suptitle(title)
         return plots
 
     def show_canvas(self, ax, **kwargs):
         ax.figure.canvas.draw()
         return ax.figure
 
-    def scatter(self, ax, X, Y, Z=None, color=Tango.colorsHex['mediumBlue'], label=None, marker='o', **kwargs):
+    def scatter(
+        self,
+        ax,
+        X,
+        Y,
+        Z=None,
+        color=Tango.colorsHex["mediumBlue"],
+        label=None,
+        marker="o",
+        **kwargs
+    ):
         if Z is not None:
             return ax.scatter(X, Y, c=color, zs=Z, label=label, marker=marker, **kwargs)
         return ax.scatter(X, Y, c=color, label=label, marker=marker, **kwargs)
@@ -106,129 +139,258 @@ class MatplotlibPlots(AbstractPlottingLibrary):
             return ax.plot(X, Y, color=color, zs=Z, label=label, **kwargs)
         return ax.plot(X, Y, color=color, label=label, **kwargs)
 
-    def plot_axis_lines(self, ax, X, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
+    def plot_axis_lines(
+        self, ax, X, color=Tango.colorsHex["darkRed"], label=None, **kwargs
+    ):
         from matplotlib import transforms
         from matplotlib.path import Path
-        if 'marker' not in kwargs:
-            kwargs['marker'] = Path([[-.2,0.],    [-.2,.5],    [0.,1.],    [.2,.5],     [.2,0.],     [-.2,0.]],
-                                    [Path.MOVETO, Path.LINETO, Path.LINETO, Path.LINETO, Path.LINETO, Path.CLOSEPOLY])
-        if 'transform' not in kwargs:
+
+        if "marker" not in kwargs:
+            kwargs["marker"] = Path(
+                [
+                    [-0.2, 0.0],
+                    [-0.2, 0.5],
+                    [0.0, 1.0],
+                    [0.2, 0.5],
+                    [0.2, 0.0],
+                    [-0.2, 0.0],
+                ],
+                [
+                    Path.MOVETO,
+                    Path.LINETO,
+                    Path.LINETO,
+                    Path.LINETO,
+                    Path.LINETO,
+                    Path.CLOSEPOLY,
+                ],
+            )
+        if "transform" not in kwargs:
             if X.shape[1] == 1:
-                kwargs['transform'] = transforms.blended_transform_factory(ax.transData, ax.transAxes)
+                kwargs["transform"] = transforms.blended_transform_factory(
+                    ax.transData, ax.transAxes
+                )
         if X.shape[1] == 2:
-            return ax.scatter(X[:,0], X[:,1], ax.get_zlim()[0], c=color, label=label, **kwargs)
+            return ax.scatter(
+                X[:, 0], X[:, 1], ax.get_zlim()[0], c=color, label=label, **kwargs
+            )
         return ax.scatter(X, np.zeros_like(X), c=color, label=label, **kwargs)
 
-    def barplot(self, ax, x, height, width=0.8, bottom=0, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
-        if 'align' not in kwargs:
-            kwargs['align'] = 'center'
-        return ax.bar(x=x, height=height, width=width,
-               bottom=bottom, label=label, color=color,
-               **kwargs)
+    def barplot(
+        self,
+        ax,
+        x,
+        height,
+        width=0.8,
+        bottom=0,
+        color=Tango.colorsHex["mediumBlue"],
+        label=None,
+        **kwargs
+    ):
+        if "align" not in kwargs:
+            kwargs["align"] = "center"
+        return ax.bar(
+            x=x,
+            height=height,
+            width=width,
+            bottom=bottom,
+            label=label,
+            color=color,
+            **kwargs
+        )
 
-    def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
-        if not('linestyle' in kwargs or 'ls' in kwargs):
-            kwargs['ls'] = 'none'
-        #if Z is not None:
+    def xerrorbar(
+        self, ax, X, Y, error, color=Tango.colorsHex["darkRed"], label=None, **kwargs
+    ):
+        if not ("linestyle" in kwargs or "ls" in kwargs):
+            kwargs["ls"] = "none"
+        # if Z is not None:
         #    return ax.errorbar(X, Y, Z, xerr=error, ecolor=color, label=label, **kwargs)
         return ax.errorbar(X, Y, xerr=error, ecolor=color, label=label, **kwargs)
 
-    def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
-        if not('linestyle' in kwargs or 'ls' in kwargs):
-            kwargs['ls'] = 'none'
-        #if Z is not None:
+    def yerrorbar(
+        self, ax, X, Y, error, color=Tango.colorsHex["darkRed"], label=None, **kwargs
+    ):
+        if not ("linestyle" in kwargs or "ls" in kwargs):
+            kwargs["ls"] = "none"
+        # if Z is not None:
         #    return ax.errorbar(X, Y, Z, yerr=error, ecolor=color, label=label, **kwargs)
         return ax.errorbar(X, Y, yerr=error, ecolor=color, label=label, **kwargs)
 
-    def imshow(self, ax, X, extent=None, label=None, vmin=None, vmax=None, **imshow_kwargs):
-        if 'origin' not in imshow_kwargs:
-            imshow_kwargs['origin'] = 'lower'
-        #xmin, xmax, ymin, ymax = extent
-        #xoffset, yoffset = (xmax - xmin) / (2. * X.shape[0]), (ymax - ymin) / (2. * X.shape[1])
-        #xmin, xmax, ymin, ymax = extent = xmin-xoffset, xmax+xoffset, ymin-yoffset, ymax+yoffset
-        return ax.imshow(X, label=label, extent=extent, vmin=vmin, vmax=vmax, **imshow_kwargs)
+    def imshow(
+        self, ax, X, extent=None, label=None, vmin=None, vmax=None, **imshow_kwargs
+    ):
+        if "origin" not in imshow_kwargs:
+            imshow_kwargs["origin"] = "lower"
+        # xmin, xmax, ymin, ymax = extent
+        # xoffset, yoffset = (xmax - xmin) / (2. * X.shape[0]), (ymax - ymin) / (2. * X.shape[1])
+        # xmin, xmax, ymin, ymax = extent = xmin-xoffset, xmax+xoffset, ymin-yoffset, ymax+yoffset
+        return ax.imshow(
+            X, label=label, extent=extent, vmin=vmin, vmax=vmax, **imshow_kwargs
+        )
 
-    def imshow_interact(self, ax, plot_function, extent, label=None, resolution=None, vmin=None, vmax=None, **imshow_kwargs):
-        if imshow_kwargs is None: imshow_kwargs = {}
-        if 'origin' not in imshow_kwargs:
-            imshow_kwargs['origin'] = 'lower'
-        return ImshowController(ax, plot_function, extent, resolution=resolution, vmin=vmin, vmax=vmax, **imshow_kwargs)
+    def imshow_interact(
+        self,
+        ax,
+        plot_function,
+        extent,
+        label=None,
+        resolution=None,
+        vmin=None,
+        vmax=None,
+        **imshow_kwargs
+    ):
+        if imshow_kwargs is None:
+            imshow_kwargs = {}
+        if "origin" not in imshow_kwargs:
+            imshow_kwargs["origin"] = "lower"
+        return ImshowController(
+            ax,
+            plot_function,
+            extent,
+            resolution=resolution,
+            vmin=vmin,
+            vmax=vmax,
+            **imshow_kwargs
+        )
 
-    def annotation_heatmap(self, ax, X, annotation, extent=None, label=None, imshow_kwargs=None, **annotation_kwargs):
-        if imshow_kwargs is None: imshow_kwargs = {}
-        if 'origin' not in imshow_kwargs:
-            imshow_kwargs['origin'] = 'lower'
-        if ('ha' not in annotation_kwargs) and ('horizontalalignment' not in annotation_kwargs):
-            annotation_kwargs['ha'] = 'center'
-        if ('va' not in annotation_kwargs) and ('verticalalignment' not in annotation_kwargs):
-            annotation_kwargs['va'] = 'center'
+    def annotation_heatmap(
+        self,
+        ax,
+        X,
+        annotation,
+        extent=None,
+        label=None,
+        imshow_kwargs=None,
+        **annotation_kwargs
+    ):
+        if imshow_kwargs is None:
+            imshow_kwargs = {}
+        if "origin" not in imshow_kwargs:
+            imshow_kwargs["origin"] = "lower"
+        if ("ha" not in annotation_kwargs) and (
+            "horizontalalignment" not in annotation_kwargs
+        ):
+            annotation_kwargs["ha"] = "center"
+        if ("va" not in annotation_kwargs) and (
+            "verticalalignment" not in annotation_kwargs
+        ):
+            annotation_kwargs["va"] = "center"
         imshow = self.imshow(ax, X, extent, label, **imshow_kwargs)
         if extent is None:
             extent = (0, X.shape[0], 0, X.shape[1])
         xmin, xmax, ymin, ymax = extent
-        xoffset, yoffset = (xmax - xmin) / (2. * X.shape[0]), (ymax - ymin) / (2. * X.shape[1])
+        xoffset, yoffset = (xmax - xmin) / (2.0 * X.shape[0]), (ymax - ymin) / (
+            2.0 * X.shape[1]
+        )
         xlin = np.linspace(xmin, xmax, X.shape[0], endpoint=False)
         ylin = np.linspace(ymin, ymax, X.shape[1], endpoint=False)
         annotations = []
         for [i, x], [j, y] in itertools.product(enumerate(xlin), enumerate(ylin)):
-            annotations.append(ax.text(x+xoffset, y+yoffset, "{}".format(annotation[j, i]), **annotation_kwargs))
+            annotations.append(
+                ax.text(
+                    x + xoffset,
+                    y + yoffset,
+                    "{}".format(annotation[j, i]),
+                    **annotation_kwargs
+                )
+            )
         return imshow, annotations
 
-    def annotation_heatmap_interact(self, ax, plot_function, extent, label=None, resolution=15, imshow_kwargs=None, **annotation_kwargs):
-        if imshow_kwargs is None: imshow_kwargs = {}
-        if 'origin' not in imshow_kwargs:
-            imshow_kwargs['origin'] = 'lower'
-        return ImAnnotateController(ax, plot_function, extent, resolution=resolution, imshow_kwargs=imshow_kwargs or {}, **annotation_kwargs)
+    def annotation_heatmap_interact(
+        self,
+        ax,
+        plot_function,
+        extent,
+        label=None,
+        resolution=15,
+        imshow_kwargs=None,
+        **annotation_kwargs
+    ):
+        if imshow_kwargs is None:
+            imshow_kwargs = {}
+        if "origin" not in imshow_kwargs:
+            imshow_kwargs["origin"] = "lower"
+        return ImAnnotateController(
+            ax,
+            plot_function,
+            extent,
+            resolution=resolution,
+            imshow_kwargs=imshow_kwargs or {},
+            **annotation_kwargs
+        )
 
     def contour(self, ax, X, Y, C, levels=20, label=None, **kwargs):
-        return ax.contour(X, Y, C, levels=np.linspace(C.min(), C.max(), levels), label=label, **kwargs)
+        return ax.contour(
+            X, Y, C, levels=np.linspace(C.min(), C.max(), levels), label=label, **kwargs
+        )
 
     def surface(self, ax, X, Y, Z, color=None, label=None, **kwargs):
         return ax.plot_surface(X, Y, Z, label=label, **kwargs)
 
-    def fill_between(self, ax, X, lower, upper, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
+    def fill_between(
+        self,
+        ax,
+        X,
+        lower,
+        upper,
+        color=Tango.colorsHex["mediumBlue"],
+        label=None,
+        **kwargs
+    ):
         return ax.fill_between(X, lower, upper, facecolor=color, label=label, **kwargs)
 
-    def fill_gradient(self, canvas, X, percentiles, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
+    def fill_gradient(
+        self,
+        canvas,
+        X,
+        percentiles,
+        color=Tango.colorsHex["mediumBlue"],
+        label=None,
+        **kwargs
+    ):
         ax = canvas
         plots = []
 
-        if 'edgecolors' not in kwargs:
-            kwargs['edgecolors'] = 'none'
+        if "edgecolors" not in kwargs:
+            kwargs["edgecolors"] = "none"
 
-        if 'facecolors' in kwargs:
-            color = kwargs.pop('facecolors')
+        if "facecolors" in kwargs:
+            color = kwargs.pop("facecolors")
 
-        if 'array' in kwargs:
-            array = kwargs.pop('array')
+        if "array" in kwargs:
+            array = kwargs.pop("array")
         else:
-            array = 1.-np.abs(np.linspace(-.97, .97, len(percentiles)-1))
+            array = 1.0 - np.abs(np.linspace(-0.97, 0.97, len(percentiles) - 1))
 
-        if 'alpha' in kwargs:
-            alpha = kwargs.pop('alpha')
+        if "alpha" in kwargs:
+            alpha = kwargs.pop("alpha")
         else:
-            alpha = .8
+            alpha = 0.8
 
-        if 'cmap' in kwargs:
-            cmap = kwargs.pop('cmap')
+        if "cmap" in kwargs:
+            cmap = kwargs.pop("cmap")
         else:
-            cmap = LinearSegmentedColormap.from_list('WhToColor', (color, color), N=array.size)
+            cmap = LinearSegmentedColormap.from_list(
+                "WhToColor", (color, color), N=array.size
+            )
         cmap._init()
-        cmap._lut[:-3, -1] = alpha*array
+        cmap._lut[:-3, -1] = alpha * array
 
-        kwargs['facecolors'] = [cmap(i) for i in np.linspace(0,1,cmap.N)]
+        kwargs["facecolors"] = [cmap(i) for i in np.linspace(0, 1, cmap.N)]
 
         # pop where from kwargs
-        where = kwargs.pop('where') if 'where' in kwargs else None
+        where = kwargs.pop("where") if "where" in kwargs else None
         # pop interpolate, which we actually do not do here!
-        if 'interpolate' in kwargs: kwargs.pop('interpolate')
+        if "interpolate" in kwargs:
+            kwargs.pop("interpolate")
 
         def pairwise(iterable):
             "s -> (s0,s1), (s1,s2), (s2, s3), ..."
             from itertools import tee
-            #try:
+
+            # try:
             #    from itertools import izip as zip
-            #except ImportError:
+            # except ImportError:
             #    pass
             a, b = tee(iterable)
             next(b, None)
@@ -245,6 +407,7 @@ class MatplotlibPlots(AbstractPlottingLibrary):
             ax._process_unit_info(ydata=y2)
             # Convert the arrays so we can work with them
             from numpy import ma
+
             x = ma.masked_invalid(ax.convert_xunits(X))
             y1 = ma.masked_invalid(ax.convert_yunits(y1))
             y2 = ma.masked_invalid(ax.convert_yunits(y2))
@@ -263,6 +426,7 @@ class MatplotlibPlots(AbstractPlottingLibrary):
                 raise ValueError("Argument dimensions are incompatible")
 
             from functools import reduce
+
             mask = reduce(ma.mask_or, [ma.getmask(a) for a in (x, y1, y2)])
             if mask is not ma.nomask:
                 where &= ~mask
@@ -277,7 +441,7 @@ class MatplotlibPlots(AbstractPlottingLibrary):
                     continue
 
                 N = len(xslice)
-                p = np.zeros((2 * N + 2, 2), np.float)
+                p = np.zeros((2 * N + 2, 2), float)
 
                 # the purpose of the next two lines is for when y2 is a
                 # scalar like 0 and we want the fill to go all the way
@@ -288,16 +452,17 @@ class MatplotlibPlots(AbstractPlottingLibrary):
                 p[0] = start
                 p[N + 1] = end
 
-                p[1:N + 1, 0] = xslice
-                p[1:N + 1, 1] = y1slice
-                p[N + 2:, 0] = xslice[::-1]
-                p[N + 2:, 1] = y2slice[::-1]
+                p[1 : N + 1, 0] = xslice
+                p[1 : N + 1, 1] = y1slice
+                p[N + 2 :, 0] = xslice[::-1]
+                p[N + 2 :, 1] = y2slice[::-1]
 
                 polys.append(p)
             polycol.extend(polys)
         from matplotlib.collections import PolyCollection
-        if 'zorder' not in kwargs:
-            kwargs['zorder'] = 0
+
+        if "zorder" not in kwargs:
+            kwargs["zorder"] = 0
         plots.append(PolyCollection(polycol, label=label, **kwargs))
         ax.add_collection(plots[-1], autolim=True)
         ax.autoscale_view()
diff --git a/GPy/testing/test_ep_likelihood.py b/GPy/testing/test_ep_likelihood.py
index 2ab42617..67bea0a4 100644
--- a/GPy/testing/test_ep_likelihood.py
+++ b/GPy/testing/test_ep_likelihood.py
@@ -24,7 +24,7 @@ class TestObservationModels:
         self.Y = (np.sin(self.X[:, 0] * 2 * np.pi) + noise)[:, None]
         self.num_points = self.X.shape[0]
         self.f = np.random.rand(self.N, 1)
-        self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
+        self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=int)[:, None]
         # self.binary_Y[self.binary_Y == 0.0] = -1.0
         self.positive_Y = np.exp(self.Y.copy())
 
diff --git a/GPy/testing/test_likelihood.py b/GPy/testing/test_likelihood.py
index ce82b9c0..f35bd0f3 100644
--- a/GPy/testing/test_likelihood.py
+++ b/GPy/testing/test_likelihood.py
@@ -136,7 +136,7 @@ class TestNoiseModels:
         noise = np.random.randn(*self.X[:, 0].shape) * self.real_std
         self.Y = (np.sin(self.X[:, 0] * 2 * np.pi) + noise)[:, None]
         self.f = np.random.rand(self.N, 1)
-        self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
+        self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=int)[:, None]
         self.binary_Y[self.binary_Y == 0.0] = -1.0
         self.positive_Y = np.exp(self.Y.copy())
         tmp = (
diff --git a/GPy/testing/test_model.py b/GPy/testing/test_model.py
index 44b2c0a6..f78885af 100644
--- a/GPy/testing/test_model.py
+++ b/GPy/testing/test_model.py
@@ -1432,8 +1432,8 @@ class TestGradient:
         y = np.zeros((D * N_train,))
         x_test = np.zeros((D * (N - N_train),))
         y_test = np.zeros((D * (N - N_train),))
-        indexD = np.zeros((D * N_train), dtype=np.int)
-        indexD_test = np.zeros((D * (N - N_train)), dtype=np.int)
+        indexD = np.zeros((D * N_train), dtype=int)
+        indexD_test = np.zeros((D * (N - N_train)), dtype=int)
 
         offset_all = 0
         offset_train = 0
diff --git a/GPy/testing/test_pickle.py b/GPy/testing/test_pickle.py
index dea50889..6783336f 100644
--- a/GPy/testing/test_pickle.py
+++ b/GPy/testing/test_pickle.py
@@ -53,7 +53,7 @@ class TestPickleSupport(ListDictTestCase):
         assert par.param_array.tolist() == pcopy.param_array.tolist()
         np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
         assert str(par) == str(pcopy)
-        assert par.param_array != pcopy.param_array
+        assert np.all(par.param_array != pcopy.param_array)
         assert par.gradient_full != pcopy.gradient_full
         assert pcopy.checkgrad()
         assert np.any(pcopy.gradient != 0.0)
@@ -72,7 +72,7 @@ class TestPickleSupport(ListDictTestCase):
         np.testing.assert_allclose(par.param_array, pcopy.param_array)
         np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
         assert str(par) == str(pcopy)
-        assert par.param_array != pcopy.param_array
+        assert np.all(par.param_array != pcopy.param_array)
         assert par.gradient_full != pcopy.gradient_full
         assert pcopy.checkgrad()
         assert np.any(pcopy.gradient != 0.0)
@@ -97,7 +97,7 @@ class TestPickleSupport(ListDictTestCase):
         assert par.param_array.tolist() == pcopy.param_array.tolist()
         assert par.gradient_full.tolist() == pcopy.gradient_full.tolist()
         assert str(par) == str(pcopy)
-        assert par.param_array != pcopy.param_array
+        assert np.all(par.param_array != pcopy.param_array)
         assert par.gradient_full != pcopy.gradient_full
         with tempfile.TemporaryFile("w+b") as f:
             par.pickle(f)
@@ -116,7 +116,7 @@ class TestPickleSupport(ListDictTestCase):
         assert par.param_array.tolist() == pcopy.param_array.tolist()
         assert par.gradient_full.tolist() == pcopy.gradient_full.tolist()
         assert str(par) == str(pcopy)
-        assert par.param_array != pcopy.param_array
+        assert np.all(par.param_array != pcopy.param_array)
         assert par.gradient_full != pcopy.gradient_full
         assert par.checkgrad()
         assert pcopy.checkgrad()
diff --git a/GPy/util/classification.py b/GPy/util/classification.py
index 69609091..bb321729 100644
--- a/GPy/util/classification.py
+++ b/GPy/util/classification.py
@@ -2,7 +2,8 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 
-def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True):
+
+def conf_matrix(p, labels, names=["1", "0"], threshold=0.5, show=True):
     """
     Returns error rate and true/false positives in a binary classification problem
     - Actual classes are displayed by column.
@@ -16,18 +17,18 @@ def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True):
     :type show: False|True
     """
     assert p.size == labels.size, "Arrays p and labels have different dimensions."
-    decision = np.ones((labels.size,1))
-    decision[p<threshold] = 0
+    decision = np.ones((labels.size, 1))
+    decision[p < threshold] = 0
     diff = decision - labels
     false_0 = diff[diff == -1].size
     false_1 = diff[diff == 1].size
-    true_1 = np.sum(decision[diff ==0])
+    true_1 = np.sum(decision[diff == 0])
     true_0 = labels.size - true_1 - false_0 - false_1
-    error = (false_1 + false_0)/np.float(labels.size)
+    error = (false_1 + false_0) / float(labels.size)
     if show:
-        print(100. - error * 100,'% instances correctly classified')
-        print('%-10s|  %-10s|  %-10s| ' % ('',names[0],names[1]))
-        print('----------|------------|------------|')
-        print('%-10s|  %-10s|  %-10s| ' % (names[0],true_1,false_0))
-        print('%-10s|  %-10s|  %-10s| ' % (names[1],false_1,true_0))
-    return error,true_1, false_1, true_0, false_0
+        print(100.0 - error * 100, "% instances correctly classified")
+        print("%-10s|  %-10s|  %-10s| " % ("", names[0], names[1]))
+        print("----------|------------|------------|")
+        print("%-10s|  %-10s|  %-10s| " % (names[0], true_1, false_0))
+        print("%-10s|  %-10s|  %-10s| " % (names[1], false_1, true_0))
+    return error, true_1, false_1, true_0, false_0
diff --git a/GPy/util/multioutput.py b/GPy/util/multioutput.py
index 91227838..ebdc27f1 100644
--- a/GPy/util/multioutput.py
+++ b/GPy/util/multioutput.py
@@ -2,6 +2,7 @@ import numpy as np
 import warnings
 import GPy
 
+
 def index_to_slices(index):
     """
     take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index.
@@ -16,28 +17,35 @@ def index_to_slices(index):
     returns
     >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
     """
-    if len(index)==0:
-        return[]
+    if len(index) == 0:
+        return []
 
-    #contruct the return structure
-    ind = np.asarray(index,dtype=np.int)
-    ret = [[] for i in range(ind.max()+1)]
+    # contruct the return structure
+    ind = np.asarray(index, dtype=int)
+    ret = [[] for i in range(ind.max() + 1)]
 
-    #find the switchpoints
-    ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
-    switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
+    # find the switchpoints
+    ind_ = np.hstack((ind, ind[0] + ind[-1] + 1))
+    switchpoints = np.nonzero(ind_ - np.roll(ind_, +1))[0]
 
-    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
+    [
+        ret[ind_i].append(slice(*indexes_i))
+        for ind_i, indexes_i in zip(
+            ind[switchpoints[:-1]], zip(switchpoints, switchpoints[1:])
+        )
+    ]
     return ret
 
+
 def get_slices(input_list):
     num_outputs = len(input_list)
-    _s = [0] + [ _x.shape[0] for _x in input_list ]
+    _s = [0] + [_x.shape[0] for _x in input_list]
     _s = np.cumsum(_s)
-    slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])]
+    slices = [slice(a, b) for a, b in zip(_s[:-1], _s[1:])]
     return slices
 
-def build_XY(input_list,output_list=None,index=None):
+
+def build_XY(input_list, output_list=None, index=None):
     num_outputs = len(input_list)
     if output_list is not None:
         assert num_outputs == len(output_list)
@@ -47,27 +55,35 @@ def build_XY(input_list,output_list=None,index=None):
 
     if index is not None:
         assert len(index) == num_outputs
-        I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,index)] )
+        I = np.hstack([np.repeat(j, _x.shape[0]) for _x, j in zip(input_list, index)])
     else:
-        I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,range(num_outputs))] )
+        I = np.hstack(
+            [np.repeat(j, _x.shape[0]) for _x, j in zip(input_list, range(num_outputs))]
+        )
 
     X = np.vstack(input_list)
-    X = np.hstack([X,I[:,None]])
+    X = np.hstack([X, I[:, None]])
 
-    return X,Y,I[:,None]#slices
+    return X, Y, I[:, None]  # slices
 
-def build_likelihood(Y_list,noise_index,likelihoods_list=None):
+
+def build_likelihood(Y_list, noise_index, likelihoods_list=None):
     Ny = len(Y_list)
     if likelihoods_list is None:
-       likelihoods_list = [GPy.likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for y,j in zip(Y_list,range(Ny))]
+        likelihoods_list = [
+            GPy.likelihoods.Gaussian(name="Gaussian_noise_%s" % j)
+            for y, j in zip(Y_list, range(Ny))
+        ]
     else:
         assert len(likelihoods_list) == Ny
-    #likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list, noise_index=noise_index)
-    likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list)
+    # likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list, noise_index=noise_index)
+    likelihood = GPy.likelihoods.mixed_noise.MixedNoise(
+        likelihoods_list=likelihoods_list
+    )
     return likelihood
 
 
-def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='ICM'):
+def ICM(input_dim, num_outputs, kernel, W_rank=1, W=None, kappa=None, name="ICM"):
     """
     Builds a kernel for an Intrinsic Coregionalization Model
 
@@ -80,13 +96,26 @@ def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='ICM'):
     """
     if kernel.input_dim != input_dim:
         kernel.input_dim = input_dim
-        warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
+        warnings.warn(
+            "kernel's input dimension overwritten to fit input_dim parameter."
+        )
 
-    K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name)
+    K = kernel.prod(
+        GPy.kern.Coregionalize(
+            1,
+            num_outputs,
+            active_dims=[input_dim],
+            rank=W_rank,
+            W=W,
+            kappa=kappa,
+            name="B",
+        ),
+        name=name,
+    )
     return K
 
 
-def LCM(input_dim, num_outputs, kernels_list, W_rank=1,name='ICM'):
+def LCM(input_dim, num_outputs, kernels_list, W_rank=1, name="ICM"):
     """
     Builds a kernel for an Linear Coregionalization Model
 
@@ -98,15 +127,15 @@ def LCM(input_dim, num_outputs, kernels_list, W_rank=1,name='ICM'):
     :type W_rank: integer
     """
     Nk = len(kernels_list)
-    K = ICM(input_dim,num_outputs,kernels_list[0],W_rank,name='%s%s' %(name,0))
+    K = ICM(input_dim, num_outputs, kernels_list[0], W_rank, name="%s%s" % (name, 0))
     j = 1
     for kernel in kernels_list[1:]:
-        K += ICM(input_dim,num_outputs,kernel,W_rank,name='%s%s' %(name,j))
+        K += ICM(input_dim, num_outputs, kernel, W_rank, name="%s%s" % (name, j))
         j += 1
     return K
 
 
-def Private(input_dim, num_outputs, kernel, output, kappa=None,name='X'):
+def Private(input_dim, num_outputs, kernel, output, kappa=None, name="X"):
     """
     Builds a kernel for an Intrinsic Coregionalization Model
 
@@ -117,7 +146,7 @@ def Private(input_dim, num_outputs, kernel, output, kappa=None,name='X'):
     :param W_rank: number tuples of the corregionalization parameters 'W'
     :type W_rank: integer
     """
-    K = ICM(input_dim,num_outputs,kernel,W_rank=1,kappa=kappa,name=name)
+    K = ICM(input_dim, num_outputs, kernel, W_rank=1, kappa=kappa, name=name)
     K.B.W.fix(0)
     _range = range(num_outputs)
     _range.pop(output)