Escape more strings

2026-07-23 17:01:06 +02:00 · 2024-10-26 09:36:11 +02:00 · 2024-10-26 09:36:11 +02:00 · 12d4c22ca7
commit 12d4c22ca7
parent 4fa858ee6d
21 changed files with 194 additions and 194 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -194,7 +194,7 @@ class GP(Model):
    # Make sure to name this variable and the predict functions will "just work"
    # In maths the predictive variable is:
    #         K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
-    #         W_{pp} := \texttt{Woodbury inv}
+    #         W_{pp} := \\texttt{Woodbury inv}
    #         p := _predictive_variable

    @property
@ -298,7 +298,7 @@ class GP(Model):
        .. math::
            p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
                        = N(f*| K_{x*x}(K_{xx} + \\Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \\Sigma)^{-1}K_{xx*}
-            \\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
+            \\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance}
        """
        mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
        if self.mean_function is not None:
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -134,10 +134,10 @@ class posteriorParams(posteriorParamsBase):
        B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
        L = jitchol(B)
        V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
-        Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
+        Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \\Sigma^(-1))^(-1)

        aux_alpha , _ = dpotrs(L, tau_tilde_root * (np.dot(K, ga_approx.v) + mean_prior), lower=1)
-        alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) - /mu_p)
+        alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) - /mu_p)
        mu = np.dot(K, alpha) + mean_prior

        return posteriorParams(mu=mu, Sigma=Sigma, L=L)
@ -151,8 +151,8 @@ class posteriorParamsDTC(posteriorParamsBase):
        DSYR(LLT,Kmn[:,i].copy(),delta_tau)
        L = jitchol(LLT)
        V,info = dtrtrs(L,Kmn,lower=1)
-        self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps)  #diag(K_nm (L L^\top)^(-1)) K_mn
-        si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
+        self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps)  #diag(K_nm (L L^\\top)^(-1)) K_mn
+        si = np.sum(V.T*V[:,i],-1) #(V V^\\top)[:,i]
        self.mu += (delta_v-delta_tau*self.mu[i])*si
        #mu = np.dot(Sigma, v_tilde)

@ -391,11 +391,11 @@ class EP(EPBase, ExactGaussianInference):


        aux_alpha , _ = dpotrs(post_params.L, tau_tilde_root * (np.dot(K, ga_approx.v) +  mean_prior), lower=1)
-        alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) -  /mu_p)
+        alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) -  /mu_p)

        LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1)
        Wi = np.dot(LWi.T,LWi)
-        symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
+        symmetrify(Wi) #(K + Sigma^(\\tilde))^(-1)

        dL_dK = 0.5 * (tdot(alpha) - Wi)
        dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
@ -530,7 +530,7 @@ class EPDTC(EPBase, VarDTC):
        #initial values - Gaussian factors
        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
        LLT0 = Kmm.copy()
-        Lm = jitchol(LLT0) #K_m = L_m L_m^\top
+        Lm = jitchol(LLT0) #K_m = L_m L_m^\\top
        Vm,info = dtrtrs(Lm, Kmn,lower=1)
        # Lmi = dtrtri(Lm)
        # Kmmi = np.dot(Lmi.T,Lmi)
--- a/GPy/inference/latent_function_inference/pep.py
+++ b/GPy/inference/latent_function_inference/pep.py
@ -8,14 +8,14 @@ log_2_pi = np.log(2*np.pi)
 class PEP(LatentFunctionInference):
    '''
    Sparse Gaussian processes using Power-Expectation Propagation
-    for regression: alpha \approx 0 gives VarDTC and alpha = 1 gives FITC
-    
-    Reference: A Unifying Framework for Sparse Gaussian Process Approximation using 
+    for regression: alpha \\approx 0 gives VarDTC and alpha = 1 gives FITC
+
+    Reference: A Unifying Framework for Sparse Gaussian Process Approximation using
    Power Expectation Propagation, https://arxiv.org/abs/1605.07066
-    
+
    '''
    const_jitter = 1e-6
-    
+
    def __init__(self, alpha):
        super(PEP, self).__init__()
        self.alpha = alpha
@ -69,7 +69,7 @@ class PEP(LatentFunctionInference):
        #compute dL_dR
        Uv = np.dot(U, v)
        dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \
-            + np.sum(np.square(Uv), 1))*beta_star**2 
+            + np.sum(np.square(Uv), 1))*beta_star**2

        # Compute dL_dKmm
        vvT_P = tdot(v.reshape(-1,1)) + P
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@ -82,7 +82,7 @@ class Posterior(object):
        Posterior mean
        $$
        K_{xx}v
-        v := \texttt{Woodbury vector}
+        v := \\texttt{Woodbury vector}
        $$
        """
        if self._mean is None:
@ -95,7 +95,7 @@ class Posterior(object):
        Posterior covariance
        $$
        K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
-        W_{xx} := \texttt{Woodbury inv}
+        W_{xx} := \\texttt{Woodbury inv}
        $$
        """
        if self._covariance is None:
@ -146,8 +146,8 @@ class Posterior(object):
        """
        return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
        $$
-        L_{W}L_{W}^{\top} = W^{-1}
-        W^{-1} := \texttt{Woodbury inv}
+        L_{W}L_{W}^{\\top} = W^{-1}
+        W^{-1} := \\texttt{Woodbury inv}
        $$
        """
        if self._woodbury_chol is None:
@ -179,7 +179,7 @@ class Posterior(object):
        The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
        $$
        (K_{xx} + \\Sigma_{xx})^{-1}
-        \\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
+        \\Sigma_{xx} := \\texttt{Likelihood.variance / Approximate likelihood covariance}
        $$
        """
        if self._woodbury_inv is None:
@ -201,7 +201,7 @@ class Posterior(object):
        Woodbury vector in the gaussian likelihood case only is defined as
        $$
        (K_{xx} + \\Sigma)^{-1}Y
-        \\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
+        \\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance}
        $$
        """
        if self._woodbury_vector is None:
--- a/GPy/kern/src/coregionalize.py
+++ b/GPy/kern/src/coregionalize.py
@ -19,18 +19,18 @@ except ImportError:


 class Coregionalize(Kern):
-    r"""
+    """
    Covariance function for intrinsic/linear coregionalization models

    This covariance has the form:

    .. math::
-       \mathbf{B} = \mathbf{W}\mathbf{W}^\intercal + \mathrm{diag}(kappa)
+       \\mathbf{B} = \\mathbf{W}\\mathbf{W}^\\intercal + \\mathrm{diag}(kappa)

    An intrinsic/linear coregionalization covariance function of the form:

    .. math::
-       k_2(x, y)=\mathbf{B} k(x, y)
+       k_2(x, y)=\\mathbf{B} k(x, y)

    it is obtained as the tensor product between a covariance function
    k(x, y) and B.
--- a/GPy/kern/src/eq_ode1.py
+++ b/GPy/kern/src/eq_ode1.py
@ -15,7 +15,7 @@ class EQ_ODE1(Kern):

    This outputs of this kernel have the form
    .. math::
-       \frac{\text{d}y_j}{\text{d}t} = \\sum_{i=1}^R w_{j,i} u_i(t-\\delta_j) - d_jy_j(t)
+       \\frac{\\text{d}y_j}{\\text{d}t} = \\sum_{i=1}^R w_{j,i} u_i(t-\\delta_j) - d_jy_j(t)

    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.

--- a/GPy/kern/src/eq_ode2.py
+++ b/GPy/kern/src/eq_ode2.py
@ -15,7 +15,7 @@ class EQ_ODE2(Kern):

    This outputs of this kernel have the form
    .. math::
-       \frac{\text{d}^2y_j(t)}{\text{d}^2t} + C_j\frac{\text{d}y_j(t)}{\text{d}t} + B_jy_j(t) = \\sum_{i=1}^R w_{j,i} u_i(t)
+       \\frac{\\text{d}^2y_j(t)}{\\text{d}^2t} + C_j\\frac{\\text{d}y_j(t)}{\\text{d}t} + B_jy_j(t) = \\sum_{i=1}^R w_{j,i} u_i(t)

    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.

--- a/GPy/kern/src/linear.py
+++ b/GPy/kern/src/linear.py
@ -121,7 +121,7 @@ class Linear(Kern):
        the returned array is of shape [NxNxQxQ].

        ..math:
-            \frac{\\partial^2 K}{\\partial X2 ^2} = - \frac{\\partial^2 K}{\\partial X\\partial X2}
+            \\frac{\\partial^2 K}{\\partial X2 ^2} = - \\frac{\\partial^2 K}{\\partial X\\partial X2}

        ..returns:
            dL2_dXdX2:  [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
--- a/GPy/kern/src/sde_matern.py
+++ b/GPy/kern/src/sde_matern.py
@ -11,15 +11,15 @@ import numpy as np

 class sde_Matern32(Matern32):
    """
-    
+
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
-    
+
    Matern 3/2 kernel:

    .. math::

-       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\  \text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
+       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }

    """
    def sde_update_gradient_full(self, gradients):
@ -27,59 +27,59 @@ class sde_Matern32(Matern32):
        Update gradient in the order in which parameters are represented in the
        kernel
        """
-    
+
        self.variance.gradient = gradients[0]
        self.lengthscale.gradient = gradients[1]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
+
+    def sde(self):
+        """
+        Return the state space representation of the covariance.
+        """
+
        variance = float(self.variance.values)
        lengthscale = float(self.lengthscale.values)
-        
-        foo  = np.sqrt(3.)/lengthscale 
-        F    = np.array(((0, 1.0), (-foo**2, -2*foo))) 
+
+        foo  = np.sqrt(3.)/lengthscale
+        F    = np.array(((0, 1.0), (-foo**2, -2*foo)))
        L    = np.array(( (0,), (1.0,) ))
-        Qc   = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) 
-        H    = np.array(((1.0, 0),)) 
+        Qc   = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
+        H    = np.array(((1.0, 0),))
        Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
        P0 = Pinf.copy()
-        
-        # Allocate space for the derivatives 
+
+        # Allocate space for the derivatives
        dF    = np.empty([F.shape[0],F.shape[1],2])
-        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
-        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
-        # The partial derivatives 
-        dFvariance       = np.zeros((2,2)) 
-        dFlengthscale    = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) 
-        dQcvariance      = np.array((12.*np.sqrt(3)/lengthscale**3)) 
-        dQclengthscale   = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) 
-        dPinfvariance    = np.array(((1,0),(0,3./lengthscale**2))) 
-        dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) 
-        # Combine the derivatives 
-        dF[:,:,0]    = dFvariance 
-        dF[:,:,1]    = dFlengthscale 
-        dQc[:,:,0]   = dQcvariance 
-        dQc[:,:,1]   = dQclengthscale 
-        dPinf[:,:,0] = dPinfvariance 
-        dPinf[:,:,1] = dPinflengthscale 
+        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2])
+        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
+        # The partial derivatives
+        dFvariance       = np.zeros((2,2))
+        dFlengthscale    = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
+        dQcvariance      = np.array((12.*np.sqrt(3)/lengthscale**3))
+        dQclengthscale   = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
+        dPinfvariance    = np.array(((1,0),(0,3./lengthscale**2)))
+        dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
+        # Combine the derivatives
+        dF[:,:,0]    = dFvariance
+        dF[:,:,1]    = dFlengthscale
+        dQc[:,:,0]   = dQcvariance
+        dQc[:,:,1]   = dQclengthscale
+        dPinf[:,:,0] = dPinfvariance
+        dPinf[:,:,1] = dPinflengthscale
        dP0 = dPinf.copy()
-        
+
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

 class sde_Matern52(Matern52):
    """
-    
+
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
-    
+
    Matern 5/2 kernel:

    .. math::

-       k(r) = \\sigma^2 (1 + \\sqrt{5} r + \frac{5}{3}r^2) \\exp(- \\sqrt{5} r) \\ \\ \\ \\  \text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
+       k(r) = \\sigma^2 (1 + \\sqrt{5} r + \\frac{5}{3}r^2) \\exp(- \\sqrt{5} r) \\ \\ \\ \\  \\text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }

    """
    def sde_update_gradient_full(self, gradients):
@ -87,51 +87,51 @@ class sde_Matern52(Matern52):
        Update gradient in the order in which parameters are represented in the
        kernel
        """
-    
+
        self.variance.gradient = gradients[0]
        self.lengthscale.gradient = gradients[1]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
+
+    def sde(self):
+        """
+        Return the state space representation of the covariance.
+        """
+
        variance = float(self.variance.values)
        lengthscale = float(self.lengthscale.values)

        lamda = np.sqrt(5.0)/lengthscale
-        kappa = 5.0/3.0*variance/lengthscale**2        
-        
+        kappa = 5.0/3.0*variance/lengthscale**2
+
        F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
        L = np.array(((0,),(0,),(1,)))
        Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
-        H = np.array(((1,0,0),))        
-        
+        H = np.array(((1,0,0),))
+
        Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
        P0 = Pinf.copy()
-        # Allocate space for the derivatives         
-        dF = np.empty((3,3,2))        
-        dQc = np.empty((1,1,2))        
+        # Allocate space for the derivatives
+        dF = np.empty((3,3,2))
+        dQc = np.empty((1,1,2))
        dPinf = np.empty((3,3,2))
-        
-         # The partial derivatives 
+
+         # The partial derivatives
        dFvariance = np.zeros((3,3))
-        dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, 
+        dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
                                   30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
        dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
-        dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))        
-        
+        dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
+
        dPinf_variance = Pinf/variance
        kappa2 = -2.0*kappa/lengthscale
-        dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, 
-                                    0,-100*variance/lengthscale**5)))        
-        # Combine the derivatives 
+        dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
+                                    0,-100*variance/lengthscale**5)))
+        # Combine the derivatives
        dF[:,:,0] = dFvariance
-        dF[:,:,1] = dFlengthscale        
-        dQc[:,:,0] = dQcvariance         
-        dQc[:,:,1] = dQclengthscale        
+        dF[:,:,1] = dFlengthscale
+        dQc[:,:,0] = dQcvariance
+        dQc[:,:,1] = dQclengthscale
        dPinf[:,:,0] = dPinf_variance
        dPinf[:,:,1] = dPinf_lengthscale
        dP0 = dPinf.copy()
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)  
+
+        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/src/sde_standard_periodic.py
+++ b/GPy/kern/src/sde_standard_periodic.py
@ -24,8 +24,8 @@ class sde_StdPeriodic(StdPeriodic):

    .. math::

-       k(x,y) = \theta_1 \\exp \\left[  - \frac{1}{2} {}\\sum_{i=1}^{input\\_dim}
-       \\left( \frac{\\sin(\frac{\\pi}{\\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
+       k(x,y) = \\theta_1 \\exp \\left[  - \\frac{1}{2} {}\\sum_{i=1}^{input\\_dim}
+       \\left( \\frac{\\sin(\\frac{\\pi}{\\lambda_i} (x_i - y_i) )}{l_i} \\right)^2 \\right] }

    """

@ -177,7 +177,7 @@ def seriescoeff(m=6, lengthScale=1.0, magnSigma2=1.0, true_covariance=False):
    Calculate the coefficients q_j^2 for the covariance function
    approximation:

-        k(\tau) =  \\sum_{j=0}^{+\\infty} q_j^2 \\cos(j\\omega_0 \tau)
+        k(\\tau) =  \\sum_{j=0}^{+\\infty} q_j^2 \\cos(j\\omega_0 \\tau)

    Reference is:

--- a/GPy/kern/src/sde_static.py
+++ b/GPy/kern/src/sde_static.py
@ -12,63 +12,63 @@ import numpy as np

 class sde_White(White):
    """
-    
+
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
-    
+
    White kernel:

    .. math::

-       k(x,y) = \alpha*\\delta(x-y)
+       k(x,y) = \\alpha*\\delta(x-y)

    """
-    
+
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
        kernel
        """
-    
+
        self.variance.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values) 
-        
+
+    def sde(self):
+        """
+        Return the state space representation of the covariance.
+        """
+
+        variance = float(self.variance.values)
+
        F = np.array( ((-np.inf,),) )
        L = np.array( ((1.0,),)  )
        Qc = np.array( ((variance,),)  )
        H = np.array( ((1.0,),) )
-        
+
        Pinf   = np.array( ((variance,),)  )
-        P0 = Pinf.copy()     
-        
+        P0 = Pinf.copy()
+
        dF = np.zeros((1,1,1))
        dQc = np.zeros((1,1,1))
        dQc[:,:,0]    = np.array( ((1.0,),) )
-        
+
        dPinf = np.zeros((1,1,1))
        dPinf[:,:,0] = np.array( ((1.0,),) )
        dP0 = dPinf.copy()
-        
+
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)


 class sde_Bias(Bias):
    """
-    
+
    Class provide extra functionality to transfer this covariance function into
    SDE forrm.
-    
+
    Bias kernel:

    .. math::

-       k(x,y) = \alpha
+       k(x,y) = \\alpha

    """
    def sde_update_gradient_full(self, gradients):
@ -76,28 +76,28 @@ class sde_Bias(Bias):
        Update gradient in the order in which parameters are represented in the
        kernel
        """
-    
+
        self.variance.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        variance = float(self.variance.values) 
-        
+
+    def sde(self):
+        """
+        Return the state space representation of the covariance.
+        """
+        variance = float(self.variance.values)
+
        F = np.array( ((0.0,),))
        L = np.array( ((1.0,),))
        Qc = np.zeros((1,1))
        H = np.array( ((1.0,),))
-        
+
        Pinf   = np.zeros((1,1))
-        P0 = np.array( ((variance,),) )      
-        
+        P0 = np.array( ((variance,),) )
+
        dF = np.zeros((1,1,1))
        dQc    = np.zeros((1,1,1))
-        
+
        dPinf = np.zeros((1,1,1))
        dP0 = np.zeros((1,1,1))
        dP0[:,:,0] = np.array( ((1.0,),) )
-        
+
        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/src/sde_stationary.py
+++ b/GPy/kern/src/sde_stationary.py
@ -29,7 +29,7 @@ class sde_RBF(RBF):

    .. math::

-        k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
+        k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \\text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }

    """

@ -102,7 +102,7 @@ class sde_RBF(RBF):
        eps = 1e-12
        if (float(Qc) > 1.0 / eps) or (float(Qc) < eps):
            warnings.warn(
-                """sde_RBF kernel: the noise variance Qc is either very large or very small. 
+                """sde_RBF kernel: the noise variance Qc is either very large or very small.
                                It influece conditioning of P_inf: {0:e}""".format(
                    float(Qc)
                )
@ -204,7 +204,7 @@ class sde_Exponential(Exponential):

    .. math::

-       k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\  \text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
+       k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\  \\text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }

    """

@ -259,7 +259,7 @@ class sde_RatQuad(RatQuad):

    .. math::

-       k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\  \text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
+       k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha} \\ \\ \\ \\  \\text{ where  } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }

    """

--- a/GPy/kern/src/standard_periodic.py
+++ b/GPy/kern/src/standard_periodic.py
@ -24,12 +24,12 @@ class StdPeriodic(Kern):

    .. math::

-       k(x,y) = \theta_1 \\exp \\left[  - \frac{1}{2} \\sum_{i=1}^{input\\_dim}
-       \\left( \frac{\\sin(\frac{\\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
+       k(x,y) = \\theta_1 \\exp \\left[  - \\frac{1}{2} \\sum_{i=1}^{input\\_dim}
+       \\left( \\frac{\\sin(\\frac{\\pi}{T_i} (x_i - y_i) )}{l_i} \\right)^2 \\right] }

    :param input_dim: the number of input dimensions
    :type input_dim: int
-    :param variance: the variance :math:`\theta_1` in the formula above
+    :param variance: the variance :math:`\\theta_1` in the formula above
    :type variance: float
    :param period: the vector of periods :math:`\\T_i`. If None then 1.0 is assumed.
    :type period: array or list of the appropriate size (or float if there is only one period parameter)
@ -177,7 +177,7 @@ class StdPeriodic(Kern):
        Returns only diagonal elements.
        """
        return np.zeros(X.shape[0])
-    
+
    def dK2_dXdX2(self, X, X2, dimX, dimX2):
        """
        Compute the second derivative of K with respect to:
@ -578,9 +578,9 @@ class StdPeriodic(Kern):
            X2 = X
        dX = -np.pi*((dL_dK*K)[:,:,None]*np.sin(2*np.pi/self.period*(X[:,None,:] - X2[None,:,:]))/(2.*np.square(self.lengthscale)*self.period)).sum(1)
        return dX
-    
+
    def gradients_X_diag(self, dL_dKdiag, X):
        return np.zeros(X.shape)
-    
+
    def input_sensitivity(self, summarize=True):
        return self.variance*np.ones(self.input_dim)/self.lengthscale**2
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@ -259,7 +259,7 @@ class Stationary(Kern):
        the returned array is of shape [NxNxQxQ].

        ..math:
-            \frac{\\partial^2 K}{\\partial X2 ^2} = - \frac{\\partial^2 K}{\\partial X\\partial X2}
+            \\frac{\\partial^2 K}{\\partial X2 ^2} = - \\frac{\\partial^2 K}{\\partial X\\partial X2}

        ..returns:
            dL2_dXdX2:  [NxMxQxQ] in the cov=True case, or [NxMxQ] in the cov=False case,
@ -295,7 +295,7 @@ class Stationary(Kern):
        Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:

        ..math:
-          \frac{\\partial^2 K}{\\partial X\\partial X}
+          \\frac{\\partial^2 K}{\\partial X\\partial X}

        ..returns:
            dL2_dXdX: [NxQxQ]
@ -423,7 +423,7 @@ class OU(Stationary):

    .. math::

-       k(r) = \\sigma^2 \\exp(- r) \\ \\ \\ \\  \\text{ where  } r = \\sqrt{\\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
+       k(r) = \\sigma^2 \\exp(- r) \\ \\ \\ \\  \\text{ where  } r = \\sqrt{\\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} }

    """

--- a/GPy/kern/src/todo/eq_ode1.py
+++ b/GPy/kern/src/todo/eq_ode1.py
@ -14,23 +14,23 @@ class Eq_ode1(Kernpart):

    This outputs of this kernel have the form
    .. math::
-       \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
+       \\frac{\\text{d}y_j}{\\text{d}t} = \\sum_{i=1}^R w_{j,i} f_i(t-\\delta_j) +\\sqrt{\\kappa_j}g_j(t) - d_jy_j(t)

    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
-    
+
    :param output_dim: number of outputs driven by latent function.
    :type output_dim: int
-    :param W: sensitivities of each output to the latent driving function. 
+    :param W: sensitivities of each output to the latent driving function.
    :type W: ndarray (output_dim x rank).
    :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
    :type rank: int
-    :param decay: decay rates for the first order system. 
+    :param decay: decay rates for the first order system.
    :type decay: array of length output_dim.
    :param delay: delay between latent force and output response.
    :type delay: array of length output_dim.
    :param kappa: diagonal term that allows each latent output to have an independent component to the response.
    :type kappa: array of length output_dim.
-    
+
    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
    """
    def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0,  decay=None, delay=None):
@ -62,7 +62,7 @@ class Eq_ode1(Kernpart):
        self.is_stationary = False
        self.gaussian_initial = False
        self._set_params(self._get_params())
-        
+
    def _get_params(self):
        param_list = [self.W.flatten()]
        if self.kappa is not None:
@ -103,11 +103,11 @@ class Eq_ode1(Kernpart):
        param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
        if self.delay is not None:
            param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
-        param_names+= ['lengthscale'] 
+        param_names+= ['lengthscale']
        return param_names

    def K(self,X,X2,target):
-        
+
        if X.shape[1] > 2:
            raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')

@ -123,13 +123,13 @@ class Eq_ode1(Kernpart):
    def Kdiag(self,index,target):
        #target += np.diag(self.B)[np.asarray(index,dtype=int).flatten()]
        pass
-    
+
    def _param_grad_helper(self,dL_dK,X,X2,target):
-        
+
        # First extract times and indices.
        self._extract_t_indices(X, X2, dL_dK=dL_dK)
        self._dK_ode_dtheta(target)
-        
+

    def _dK_ode_dtheta(self, target):
        """Do all the computations for the ode parts of the covariance function."""
@ -138,7 +138,7 @@ class Eq_ode1(Kernpart):
        index_ode = self._index[self._index>0]-1
        if self._t2 is None:
            if t_ode.size==0:
-                return        
+                return
            t2_ode = t_ode
            dL_dK_ode = dL_dK_ode[:, self._index>0]
            index2_ode = index_ode
@ -210,7 +210,7 @@ class Eq_ode1(Kernpart):
        self._index = self._index[self._order]
        self._t = self._t[self._order]
        self._rorder = self._order.argsort() # rorder is for reversing the order
-        
+
        if X2 is None:
            self._t2 = None
            self._index2 = None
@ -229,7 +229,7 @@ class Eq_ode1(Kernpart):
        if dL_dK is not None:
            self._dL_dK = dL_dK[self._order, :]
            self._dL_dK = self._dL_dK[:, self._order2]
-            
+
    def _K_computations(self, X, X2):
        """Perform main body of computations for the ode1 covariance function."""
        # First extract times and indices.
@ -253,8 +253,8 @@ class Eq_ode1(Kernpart):
                                                   np.hstack((self._K_ode_eq, self._K_ode))))
        self._K_dvar = self._K_dvar[self._rorder, :]
        self._K_dvar = self._K_dvar[:, self._rorder2]
-        
-        
+
+
        if X2 is None:
            # Matrix giving scales of each output
            self._scale = np.zeros((self._t.size, self._t.size))
@ -303,7 +303,7 @@ class Eq_ode1(Kernpart):
                self._K_eq = np.zeros((t_eq.size, t2_eq.size))
                return
            self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
-        
+
        self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
        if self.is_normalized:
            self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
@ -361,7 +361,7 @@ class Eq_ode1(Kernpart):
            self._K_eq_ode = sK.T
        else:
            self._K_ode_eq = sK
-        
+
    def _K_compute_ode(self):
        # Compute covariances between outputs of the ODE models.

@ -370,7 +370,7 @@ class Eq_ode1(Kernpart):
        if self._t2 is None:
            if t_ode.size==0:
                self._K_ode = np.zeros((0, 0))
-                return        
+                return
            t2_ode = t_ode
            index2_ode = index_ode
        else:
@ -379,14 +379,14 @@ class Eq_ode1(Kernpart):
                self._K_ode = np.zeros((t_ode.size, t2_ode.size))
                return
            index2_ode = self._index2[self._index2>0]-1
-        
+
        # When index is identical
        h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)

        if self._t2 is None:
            self._K_ode = 0.5 * (h + h.T)
        else:
-            h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)                
+            h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
            self._K_ode = 0.5 * (h + h2.T)

        if not self.is_normalized:
@ -410,28 +410,28 @@ class Eq_ode1(Kernpart):
            Decay = self.decay[index]
            if self.delay is not None:
                t = t - self.delay[index]
-            
+
            t_squared = t*t
            half_sigma_decay = 0.5*self.sigma*Decay
            [ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
                                              half_sigma_decay)
-    
+
            [ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
                                              half_sigma_decay - t/self.sigma)
-            
+
            h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
                             + ln_part_1
-                             - log(Decay + D_j)) 
+                             - log(Decay + D_j))
                 - sign2*np.exp(half_sigma_decay*half_sigma_decay
                                - (Decay + D_j)*t
-                                + ln_part_2 
+                                + ln_part_2
                                - log(Decay + D_j)))
-    
+
            sigma2 = self.sigma*self.sigma

        if update_derivatives:
-        
-            dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h 
+
+            dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
                       + t*sign2*np.exp(
                half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
                )
@ -439,11 +439,11 @@ class Eq_ode1(Kernpart):
                       (-1 + np.exp(-t_squared/sigma2-Decay*t)
                        + np.exp(-t_squared/sigma2-D_j*t)
                        - np.exp(-(Decay + D_j)*t)))
-        
+
            dh_dD_i = (dh_dD_i/(Decay+D_j)).real
-        
-        
-        
+
+
+
            dh_dD_j = (t*sign2*np.exp(
                half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
                )
@ -457,7 +457,7 @@ class Eq_ode1(Kernpart):
                          - (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
                          - Decay/2*np.exp(-(Decay+D_j)*t))"""
        pass
-    
+
    def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
        """Helper function for computing part of the ode1 covariance function.

@ -491,7 +491,7 @@ class Eq_ode1(Kernpart):
        inv_sigma_diff_t = 1./self.sigma*diff_t
        half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]

-        ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma, 
+        ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
                                        half_sigma_decay_i - inv_sigma_diff_t,
                                        return_sign=True)
        ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
@ -529,7 +529,7 @@ class Eq_ode1(Kernpart):
                )
                ))
            self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
-            
+
            # Update jth decay gradient
            dh_ddecay2 = (t2_mat*sign2
                         *np.exp(
@ -539,7 +539,7 @@ class Eq_ode1(Kernpart):
                )
                         -h)
            self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
-            
+
            # Update sigma gradient
            self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
                               + 2/(np.sqrt(np.pi)
@ -547,10 +547,10 @@ class Eq_ode1(Kernpart):
                               *((-diff_t/sigma2-Decay[:, None]/2)
                                 *np.exp(-diff_t*diff_t/sigma2)
                                 + (-t2_mat/sigma2+Decay[:, None]/2)
-                                 *np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat) 
-                                 - (-t_mat/sigma2-Decay[:, None]/2) 
-                                 *np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat) 
+                                 *np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
+                                 - (-t_mat/sigma2-Decay[:, None]/2)
+                                 *np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
                                 - Decay[:, None]/2
                                 *np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
-                
+
        return h
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -243,7 +243,7 @@ class Bernoulli(Likelihood):
        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
        #d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
        state = np.seterr(divide='ignore')
-        # TODO check y \in {0, 1} or {-1, 1}
+        # TODO check y \\in {0, 1} or {-1, 1}
        d3logpdf_dlink3 = np.where(y==1, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
        np.seterr(**state)
        return d3logpdf_dlink3
--- a/GPy/mappings/kernel.py
+++ b/GPy/mappings/kernel.py
@ -12,13 +12,13 @@ class Kernel(Mapping):

    .. math::

-       f(\\mathbf{x}) = \\sum_i \alpha_i k(\\mathbf{z}_i, \\mathbf{x})
+       f(\\mathbf{x}) = \\sum_i \\alpha_i k(\\mathbf{z}_i, \\mathbf{x})

    or for multple outputs

    .. math::

-       f_i(\\mathbf{x}) = \\sum_j \alpha_{i,j} k(\\mathbf{z}_i, \\mathbf{x})
+       f_i(\\mathbf{x}) = \\sum_j \\alpha_{i,j} k(\\mathbf{z}_i, \\mathbf{x})


    :param input_dim: dimension of input.
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@ -4002,10 +4002,10 @@ class ContDescrStateSpace(DescreteStateSpace):
        """
        Linear Time-Invariant Stochastic Differential Equation (LTI SDE):

-            dx(t) = F x(t) dt + L d \beta  ,where
+            dx(t) = F x(t) dt + L d \\beta  ,where

                x(t): (vector) stochastic process
-                \beta: (vector) Brownian motion process
+                \\beta: (vector) Brownian motion process
                F, L: (time invariant) matrices of corresponding dimensions
                Qc: covariance of noise.

@ -4022,7 +4022,7 @@ class ContDescrStateSpace(DescreteStateSpace):
        F,L: LTI SDE matrices of corresponding dimensions

        Qc: matrix (n,n)
-            Covarince between different dimensions of noise \beta.
+            Covarince between different dimensions of noise \\beta.
            n is the dimensionality of the noise.

        dt: double or iterable
--- a/GPy/models/tp_regression.py
+++ b/GPy/models/tp_regression.py
@ -185,9 +185,9 @@ class TPRegression(Model):

        .. math::
            p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
-                        = MVN\\left(\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
-                        \frac{\nu + \beta - 2}{\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\right)
-            \nu := \texttt{Degrees of freedom}
+                        = MVN\\left(\\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
+                        \\frac{\\nu + \\beta - 2}{\\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\\right)
+            \\nu := \\texttt{Degrees of freedom}
        """
        mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew,
                                              pred_var=self._predictive_variable, full_cov=full_cov)
--- a/GPy/testing/test_model.py
+++ b/GPy/testing/test_model.py
@ -269,8 +269,8 @@ class TestMisc:
        from GPy.core.parameterization.variational import NormalPosterior

        X_pred = NormalPosterior(X_pred_mu, X_pred_var)
-        # mu = \int f(x)q(x|mu,S) dx = \int 2x.q(x|mu,S) dx = 2.mu
-        # S = \int (f(x) - m)^2q(x|mu,S) dx = \int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S
+        # mu = \\int f(x)q(x|mu,S) dx = \\int 2x.q(x|mu,S) dx = 2.mu
+        # S = \\int (f(x) - m)^2q(x|mu,S) dx = \\int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S
        Y_mu_true = 2 * X_pred_mu
        Y_var_true = 4 * X_pred_var
        Y_mu_pred, Y_var_pred = m.predict_noiseless(X_pred)
--- a/GPy/testing/test_rv_transformation.py
+++ b/GPy/testing/test_rv_transformation.py
@ -47,7 +47,7 @@ class TestRVTransformation:
        # ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
        # ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
        # ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
-        # ax.set_xlabel(r'transformed $\theta$', fontsize=16)
+        # ax.set_xlabel(r'transformed $\\theta$', fontsize=16)
        # ax.set_ylabel('PDF', fontsize=16)
        # plt.legend(loc='best')
        # plt.show(block=True)