diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 01190101..95a4c220 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -194,7 +194,7 @@ class GP(Model): # Make sure to name this variable and the predict functions will "just work" # In maths the predictive variable is: # K_{xx} - K_{xp}W_{pp}^{-1}K_{px} - # W_{pp} := \texttt{Woodbury inv} + # W_{pp} := \\texttt{Woodbury inv} # p := _predictive_variable @property @@ -298,7 +298,7 @@ class GP(Model): .. math:: p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df = N(f*| K_{x*x}(K_{xx} + \\Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \\Sigma)^{-1}K_{xx*} - \\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} + \\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance} """ mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov) if self.mean_function is not None: diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py index 2a558b5b..df27d7db 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation.py +++ b/GPy/inference/latent_function_inference/expectation_propagation.py @@ -134,10 +134,10 @@ class posteriorParams(posteriorParamsBase): B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:] L = jitchol(B) V, _ = dtrtrs(L, Sroot_tilde_K, lower=1) - Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1) + Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \\Sigma^(-1))^(-1) aux_alpha , _ = dpotrs(L, tau_tilde_root * (np.dot(K, ga_approx.v) + mean_prior), lower=1) - alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) - /mu_p) + alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) - /mu_p) mu = np.dot(K, alpha) + mean_prior return posteriorParams(mu=mu, Sigma=Sigma, L=L) @@ -151,8 +151,8 @@ class posteriorParamsDTC(posteriorParamsBase): DSYR(LLT,Kmn[:,i].copy(),delta_tau) L = jitchol(LLT) V,info = dtrtrs(L,Kmn,lower=1) - self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn - si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i] + self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\\top)^(-1)) K_mn + si = np.sum(V.T*V[:,i],-1) #(V V^\\top)[:,i] self.mu += (delta_v-delta_tau*self.mu[i])*si #mu = np.dot(Sigma, v_tilde) @@ -391,11 +391,11 @@ class EP(EPBase, ExactGaussianInference): aux_alpha , _ = dpotrs(post_params.L, tau_tilde_root * (np.dot(K, ga_approx.v) + mean_prior), lower=1) - alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) - /mu_p) + alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) - /mu_p) LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1) Wi = np.dot(LWi.T,LWi) - symmetrify(Wi) #(K + Sigma^(\tilde))^(-1) + symmetrify(Wi) #(K + Sigma^(\\tilde))^(-1) dL_dK = 0.5 * (tdot(alpha) - Wi) dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh') @@ -530,7 +530,7 @@ class EPDTC(EPBase, VarDTC): #initial values - Gaussian factors #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) LLT0 = Kmm.copy() - Lm = jitchol(LLT0) #K_m = L_m L_m^\top + Lm = jitchol(LLT0) #K_m = L_m L_m^\\top Vm,info = dtrtrs(Lm, Kmn,lower=1) # Lmi = dtrtri(Lm) # Kmmi = np.dot(Lmi.T,Lmi) diff --git a/GPy/inference/latent_function_inference/pep.py b/GPy/inference/latent_function_inference/pep.py index 79706292..cd4ac0cc 100644 --- a/GPy/inference/latent_function_inference/pep.py +++ b/GPy/inference/latent_function_inference/pep.py @@ -8,14 +8,14 @@ log_2_pi = np.log(2*np.pi) class PEP(LatentFunctionInference): ''' Sparse Gaussian processes using Power-Expectation Propagation - for regression: alpha \approx 0 gives VarDTC and alpha = 1 gives FITC - - Reference: A Unifying Framework for Sparse Gaussian Process Approximation using + for regression: alpha \\approx 0 gives VarDTC and alpha = 1 gives FITC + + Reference: A Unifying Framework for Sparse Gaussian Process Approximation using Power Expectation Propagation, https://arxiv.org/abs/1605.07066 - + ''' const_jitter = 1e-6 - + def __init__(self, alpha): super(PEP, self).__init__() self.alpha = alpha @@ -69,7 +69,7 @@ class PEP(LatentFunctionInference): #compute dL_dR Uv = np.dot(U, v) dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \ - + np.sum(np.square(Uv), 1))*beta_star**2 + + np.sum(np.square(Uv), 1))*beta_star**2 # Compute dL_dKmm vvT_P = tdot(v.reshape(-1,1)) + P diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index e3cea7ea..e0de2aaa 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -82,7 +82,7 @@ class Posterior(object): Posterior mean $$ K_{xx}v - v := \texttt{Woodbury vector} + v := \\texttt{Woodbury vector} $$ """ if self._mean is None: @@ -95,7 +95,7 @@ class Posterior(object): Posterior covariance $$ K_{xx} - K_{xx}W_{xx}^{-1}K_{xx} - W_{xx} := \texttt{Woodbury inv} + W_{xx} := \\texttt{Woodbury inv} $$ """ if self._covariance is None: @@ -146,8 +146,8 @@ class Posterior(object): """ return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix $$ - L_{W}L_{W}^{\top} = W^{-1} - W^{-1} := \texttt{Woodbury inv} + L_{W}L_{W}^{\\top} = W^{-1} + W^{-1} := \\texttt{Woodbury inv} $$ """ if self._woodbury_chol is None: @@ -179,7 +179,7 @@ class Posterior(object): The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as $$ (K_{xx} + \\Sigma_{xx})^{-1} - \\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance} + \\Sigma_{xx} := \\texttt{Likelihood.variance / Approximate likelihood covariance} $$ """ if self._woodbury_inv is None: @@ -201,7 +201,7 @@ class Posterior(object): Woodbury vector in the gaussian likelihood case only is defined as $$ (K_{xx} + \\Sigma)^{-1}Y - \\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} + \\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance} $$ """ if self._woodbury_vector is None: diff --git a/GPy/kern/src/coregionalize.py b/GPy/kern/src/coregionalize.py index aaaeae72..a7c4f6f7 100644 --- a/GPy/kern/src/coregionalize.py +++ b/GPy/kern/src/coregionalize.py @@ -19,18 +19,18 @@ except ImportError: class Coregionalize(Kern): - r""" + """ Covariance function for intrinsic/linear coregionalization models This covariance has the form: .. math:: - \mathbf{B} = \mathbf{W}\mathbf{W}^\intercal + \mathrm{diag}(kappa) + \\mathbf{B} = \\mathbf{W}\\mathbf{W}^\\intercal + \\mathrm{diag}(kappa) An intrinsic/linear coregionalization covariance function of the form: .. math:: - k_2(x, y)=\mathbf{B} k(x, y) + k_2(x, y)=\\mathbf{B} k(x, y) it is obtained as the tensor product between a covariance function k(x, y) and B. diff --git a/GPy/kern/src/eq_ode1.py b/GPy/kern/src/eq_ode1.py index 69b5ec54..caedc7a3 100644 --- a/GPy/kern/src/eq_ode1.py +++ b/GPy/kern/src/eq_ode1.py @@ -15,7 +15,7 @@ class EQ_ODE1(Kern): This outputs of this kernel have the form .. math:: - \frac{\text{d}y_j}{\text{d}t} = \\sum_{i=1}^R w_{j,i} u_i(t-\\delta_j) - d_jy_j(t) + \\frac{\\text{d}y_j}{\\text{d}t} = \\sum_{i=1}^R w_{j,i} u_i(t-\\delta_j) - d_jy_j(t) where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance. diff --git a/GPy/kern/src/eq_ode2.py b/GPy/kern/src/eq_ode2.py index e2f251d7..e809b151 100644 --- a/GPy/kern/src/eq_ode2.py +++ b/GPy/kern/src/eq_ode2.py @@ -15,7 +15,7 @@ class EQ_ODE2(Kern): This outputs of this kernel have the form .. math:: - \frac{\text{d}^2y_j(t)}{\text{d}^2t} + C_j\frac{\text{d}y_j(t)}{\text{d}t} + B_jy_j(t) = \\sum_{i=1}^R w_{j,i} u_i(t) + \\frac{\\text{d}^2y_j(t)}{\\text{d}^2t} + C_j\\frac{\\text{d}y_j(t)}{\\text{d}t} + B_jy_j(t) = \\sum_{i=1}^R w_{j,i} u_i(t) where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance. diff --git a/GPy/kern/src/linear.py b/GPy/kern/src/linear.py index 6106a671..1caf93f6 100644 --- a/GPy/kern/src/linear.py +++ b/GPy/kern/src/linear.py @@ -121,7 +121,7 @@ class Linear(Kern): the returned array is of shape [NxNxQxQ]. ..math: - \frac{\\partial^2 K}{\\partial X2 ^2} = - \frac{\\partial^2 K}{\\partial X\\partial X2} + \\frac{\\partial^2 K}{\\partial X2 ^2} = - \\frac{\\partial^2 K}{\\partial X\\partial X2} ..returns: dL2_dXdX2: [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None) diff --git a/GPy/kern/src/sde_matern.py b/GPy/kern/src/sde_matern.py index f896957e..de321f58 100644 --- a/GPy/kern/src/sde_matern.py +++ b/GPy/kern/src/sde_matern.py @@ -11,15 +11,15 @@ import numpy as np class sde_Matern32(Matern32): """ - + Class provide extra functionality to transfer this covariance function into SDE forrm. - + Matern 3/2 kernel: .. math:: - k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} } + k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} } """ def sde_update_gradient_full(self, gradients): @@ -27,59 +27,59 @@ class sde_Matern32(Matern32): Update gradient in the order in which parameters are represented in the kernel """ - + self.variance.gradient = gradients[0] self.lengthscale.gradient = gradients[1] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - + + def sde(self): + """ + Return the state space representation of the covariance. + """ + variance = float(self.variance.values) lengthscale = float(self.lengthscale.values) - - foo = np.sqrt(3.)/lengthscale - F = np.array(((0, 1.0), (-foo**2, -2*foo))) + + foo = np.sqrt(3.)/lengthscale + F = np.array(((0, 1.0), (-foo**2, -2*foo))) L = np.array(( (0,), (1.0,) )) - Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) - H = np.array(((1.0, 0),)) + Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) + H = np.array(((1.0, 0),)) Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2)))) P0 = Pinf.copy() - - # Allocate space for the derivatives + + # Allocate space for the derivatives dF = np.empty([F.shape[0],F.shape[1],2]) - dQc = np.empty([Qc.shape[0],Qc.shape[1],2]) - dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) - # The partial derivatives - dFvariance = np.zeros((2,2)) - dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) - dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3)) - dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) - dPinfvariance = np.array(((1,0),(0,3./lengthscale**2))) - dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) - # Combine the derivatives - dF[:,:,0] = dFvariance - dF[:,:,1] = dFlengthscale - dQc[:,:,0] = dQcvariance - dQc[:,:,1] = dQclengthscale - dPinf[:,:,0] = dPinfvariance - dPinf[:,:,1] = dPinflengthscale + dQc = np.empty([Qc.shape[0],Qc.shape[1],2]) + dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) + # The partial derivatives + dFvariance = np.zeros((2,2)) + dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) + dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3)) + dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) + dPinfvariance = np.array(((1,0),(0,3./lengthscale**2))) + dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) + # Combine the derivatives + dF[:,:,0] = dFvariance + dF[:,:,1] = dFlengthscale + dQc[:,:,0] = dQcvariance + dQc[:,:,1] = dQclengthscale + dPinf[:,:,0] = dPinfvariance + dPinf[:,:,1] = dPinflengthscale dP0 = dPinf.copy() - + return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) class sde_Matern52(Matern52): """ - + Class provide extra functionality to transfer this covariance function into SDE forrm. - + Matern 5/2 kernel: .. math:: - k(r) = \\sigma^2 (1 + \\sqrt{5} r + \frac{5}{3}r^2) \\exp(- \\sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} } + k(r) = \\sigma^2 (1 + \\sqrt{5} r + \\frac{5}{3}r^2) \\exp(- \\sqrt{5} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} } """ def sde_update_gradient_full(self, gradients): @@ -87,51 +87,51 @@ class sde_Matern52(Matern52): Update gradient in the order in which parameters are represented in the kernel """ - + self.variance.gradient = gradients[0] self.lengthscale.gradient = gradients[1] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - + + def sde(self): + """ + Return the state space representation of the covariance. + """ + variance = float(self.variance.values) lengthscale = float(self.lengthscale.values) lamda = np.sqrt(5.0)/lengthscale - kappa = 5.0/3.0*variance/lengthscale**2 - + kappa = 5.0/3.0*variance/lengthscale**2 + F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda))) L = np.array(((0,),(0,),(1,))) Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),)) - H = np.array(((1,0,0),)) - + H = np.array(((1,0,0),)) + Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4))) P0 = Pinf.copy() - # Allocate space for the derivatives - dF = np.empty((3,3,2)) - dQc = np.empty((1,1,2)) + # Allocate space for the derivatives + dF = np.empty((3,3,2)) + dQc = np.empty((1,1,2)) dPinf = np.empty((3,3,2)) - - # The partial derivatives + + # The partial derivatives dFvariance = np.zeros((3,3)) - dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, + dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, 30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2))) dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),))) - dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),))) - + dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),))) + dPinf_variance = Pinf/variance kappa2 = -2.0*kappa/lengthscale - dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, - 0,-100*variance/lengthscale**5))) - # Combine the derivatives + dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, + 0,-100*variance/lengthscale**5))) + # Combine the derivatives dF[:,:,0] = dFvariance - dF[:,:,1] = dFlengthscale - dQc[:,:,0] = dQcvariance - dQc[:,:,1] = dQclengthscale + dF[:,:,1] = dFlengthscale + dQc[:,:,0] = dQcvariance + dQc[:,:,1] = dQclengthscale dPinf[:,:,0] = dPinf_variance dPinf[:,:,1] = dPinf_lengthscale dP0 = dPinf.copy() - - return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) \ No newline at end of file + + return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) \ No newline at end of file diff --git a/GPy/kern/src/sde_standard_periodic.py b/GPy/kern/src/sde_standard_periodic.py index cf6ab30d..add45936 100644 --- a/GPy/kern/src/sde_standard_periodic.py +++ b/GPy/kern/src/sde_standard_periodic.py @@ -24,8 +24,8 @@ class sde_StdPeriodic(StdPeriodic): .. math:: - k(x,y) = \theta_1 \\exp \\left[ - \frac{1}{2} {}\\sum_{i=1}^{input\\_dim} - \\left( \frac{\\sin(\frac{\\pi}{\\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] } + k(x,y) = \\theta_1 \\exp \\left[ - \\frac{1}{2} {}\\sum_{i=1}^{input\\_dim} + \\left( \\frac{\\sin(\\frac{\\pi}{\\lambda_i} (x_i - y_i) )}{l_i} \\right)^2 \\right] } """ @@ -177,7 +177,7 @@ def seriescoeff(m=6, lengthScale=1.0, magnSigma2=1.0, true_covariance=False): Calculate the coefficients q_j^2 for the covariance function approximation: - k(\tau) = \\sum_{j=0}^{+\\infty} q_j^2 \\cos(j\\omega_0 \tau) + k(\\tau) = \\sum_{j=0}^{+\\infty} q_j^2 \\cos(j\\omega_0 \\tau) Reference is: diff --git a/GPy/kern/src/sde_static.py b/GPy/kern/src/sde_static.py index 93a13b3e..c61660a9 100644 --- a/GPy/kern/src/sde_static.py +++ b/GPy/kern/src/sde_static.py @@ -12,63 +12,63 @@ import numpy as np class sde_White(White): """ - + Class provide extra functionality to transfer this covariance function into SDE forrm. - + White kernel: .. math:: - k(x,y) = \alpha*\\delta(x-y) + k(x,y) = \\alpha*\\delta(x-y) """ - + def sde_update_gradient_full(self, gradients): """ Update gradient in the order in which parameters are represented in the kernel """ - + self.variance.gradient = gradients[0] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - - variance = float(self.variance.values) - + + def sde(self): + """ + Return the state space representation of the covariance. + """ + + variance = float(self.variance.values) + F = np.array( ((-np.inf,),) ) L = np.array( ((1.0,),) ) Qc = np.array( ((variance,),) ) H = np.array( ((1.0,),) ) - + Pinf = np.array( ((variance,),) ) - P0 = Pinf.copy() - + P0 = Pinf.copy() + dF = np.zeros((1,1,1)) dQc = np.zeros((1,1,1)) dQc[:,:,0] = np.array( ((1.0,),) ) - + dPinf = np.zeros((1,1,1)) dPinf[:,:,0] = np.array( ((1.0,),) ) dP0 = dPinf.copy() - + return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) class sde_Bias(Bias): """ - + Class provide extra functionality to transfer this covariance function into SDE forrm. - + Bias kernel: .. math:: - k(x,y) = \alpha + k(x,y) = \\alpha """ def sde_update_gradient_full(self, gradients): @@ -76,28 +76,28 @@ class sde_Bias(Bias): Update gradient in the order in which parameters are represented in the kernel """ - + self.variance.gradient = gradients[0] - - def sde(self): - """ - Return the state space representation of the covariance. - """ - variance = float(self.variance.values) - + + def sde(self): + """ + Return the state space representation of the covariance. + """ + variance = float(self.variance.values) + F = np.array( ((0.0,),)) L = np.array( ((1.0,),)) Qc = np.zeros((1,1)) H = np.array( ((1.0,),)) - + Pinf = np.zeros((1,1)) - P0 = np.array( ((variance,),) ) - + P0 = np.array( ((variance,),) ) + dF = np.zeros((1,1,1)) dQc = np.zeros((1,1,1)) - + dPinf = np.zeros((1,1,1)) dP0 = np.zeros((1,1,1)) dP0[:,:,0] = np.array( ((1.0,),) ) - + return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0) \ No newline at end of file diff --git a/GPy/kern/src/sde_stationary.py b/GPy/kern/src/sde_stationary.py index 58bb7424..491135c1 100644 --- a/GPy/kern/src/sde_stationary.py +++ b/GPy/kern/src/sde_stationary.py @@ -29,7 +29,7 @@ class sde_RBF(RBF): .. math:: - k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} } + k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} } """ @@ -102,7 +102,7 @@ class sde_RBF(RBF): eps = 1e-12 if (float(Qc) > 1.0 / eps) or (float(Qc) < eps): warnings.warn( - """sde_RBF kernel: the noise variance Qc is either very large or very small. + """sde_RBF kernel: the noise variance Qc is either very large or very small. It influece conditioning of P_inf: {0:e}""".format( float(Qc) ) @@ -204,7 +204,7 @@ class sde_Exponential(Exponential): .. math:: - k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} } + k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} } """ @@ -259,7 +259,7 @@ class sde_RatQuad(RatQuad): .. math:: - k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} } + k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha} \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} } """ diff --git a/GPy/kern/src/standard_periodic.py b/GPy/kern/src/standard_periodic.py index 4baf86ee..6168956a 100644 --- a/GPy/kern/src/standard_periodic.py +++ b/GPy/kern/src/standard_periodic.py @@ -24,12 +24,12 @@ class StdPeriodic(Kern): .. math:: - k(x,y) = \theta_1 \\exp \\left[ - \frac{1}{2} \\sum_{i=1}^{input\\_dim} - \\left( \frac{\\sin(\frac{\\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] } + k(x,y) = \\theta_1 \\exp \\left[ - \\frac{1}{2} \\sum_{i=1}^{input\\_dim} + \\left( \\frac{\\sin(\\frac{\\pi}{T_i} (x_i - y_i) )}{l_i} \\right)^2 \\right] } :param input_dim: the number of input dimensions :type input_dim: int - :param variance: the variance :math:`\theta_1` in the formula above + :param variance: the variance :math:`\\theta_1` in the formula above :type variance: float :param period: the vector of periods :math:`\\T_i`. If None then 1.0 is assumed. :type period: array or list of the appropriate size (or float if there is only one period parameter) @@ -177,7 +177,7 @@ class StdPeriodic(Kern): Returns only diagonal elements. """ return np.zeros(X.shape[0]) - + def dK2_dXdX2(self, X, X2, dimX, dimX2): """ Compute the second derivative of K with respect to: @@ -578,9 +578,9 @@ class StdPeriodic(Kern): X2 = X dX = -np.pi*((dL_dK*K)[:,:,None]*np.sin(2*np.pi/self.period*(X[:,None,:] - X2[None,:,:]))/(2.*np.square(self.lengthscale)*self.period)).sum(1) return dX - + def gradients_X_diag(self, dL_dKdiag, X): return np.zeros(X.shape) - + def input_sensitivity(self, summarize=True): return self.variance*np.ones(self.input_dim)/self.lengthscale**2 diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index bb8c6bcb..507684b8 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -259,7 +259,7 @@ class Stationary(Kern): the returned array is of shape [NxNxQxQ]. ..math: - \frac{\\partial^2 K}{\\partial X2 ^2} = - \frac{\\partial^2 K}{\\partial X\\partial X2} + \\frac{\\partial^2 K}{\\partial X2 ^2} = - \\frac{\\partial^2 K}{\\partial X\\partial X2} ..returns: dL2_dXdX2: [NxMxQxQ] in the cov=True case, or [NxMxQ] in the cov=False case, @@ -295,7 +295,7 @@ class Stationary(Kern): Given the derivative of the objective dL_dK, compute the second derivative of K wrt X: ..math: - \frac{\\partial^2 K}{\\partial X\\partial X} + \\frac{\\partial^2 K}{\\partial X\\partial X} ..returns: dL2_dXdX: [NxQxQ] @@ -423,7 +423,7 @@ class OU(Stationary): .. math:: - k(r) = \\sigma^2 \\exp(- r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} } + k(r) = \\sigma^2 \\exp(- r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} } """ diff --git a/GPy/kern/src/todo/eq_ode1.py b/GPy/kern/src/todo/eq_ode1.py index 7104a8e9..71604a20 100644 --- a/GPy/kern/src/todo/eq_ode1.py +++ b/GPy/kern/src/todo/eq_ode1.py @@ -14,23 +14,23 @@ class Eq_ode1(Kernpart): This outputs of this kernel have the form .. math:: - \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t) + \\frac{\\text{d}y_j}{\\text{d}t} = \\sum_{i=1}^R w_{j,i} f_i(t-\\delta_j) +\\sqrt{\\kappa_j}g_j(t) - d_jy_j(t) where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance. - + :param output_dim: number of outputs driven by latent function. :type output_dim: int - :param W: sensitivities of each output to the latent driving function. + :param W: sensitivities of each output to the latent driving function. :type W: ndarray (output_dim x rank). :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance. :type rank: int - :param decay: decay rates for the first order system. + :param decay: decay rates for the first order system. :type decay: array of length output_dim. :param delay: delay between latent force and output response. :type delay: array of length output_dim. :param kappa: diagonal term that allows each latent output to have an independent component to the response. :type kappa: array of length output_dim. - + .. Note: see first order differential equation examples in GPy.examples.regression for some usage. """ def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0, decay=None, delay=None): @@ -62,7 +62,7 @@ class Eq_ode1(Kernpart): self.is_stationary = False self.gaussian_initial = False self._set_params(self._get_params()) - + def _get_params(self): param_list = [self.W.flatten()] if self.kappa is not None: @@ -103,11 +103,11 @@ class Eq_ode1(Kernpart): param_names += ['decay_%i'%i for i in range(1,self.output_dim)] if self.delay is not None: param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)] - param_names+= ['lengthscale'] + param_names+= ['lengthscale'] return param_names def K(self,X,X2,target): - + if X.shape[1] > 2: raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices') @@ -123,13 +123,13 @@ class Eq_ode1(Kernpart): def Kdiag(self,index,target): #target += np.diag(self.B)[np.asarray(index,dtype=int).flatten()] pass - + def _param_grad_helper(self,dL_dK,X,X2,target): - + # First extract times and indices. self._extract_t_indices(X, X2, dL_dK=dL_dK) self._dK_ode_dtheta(target) - + def _dK_ode_dtheta(self, target): """Do all the computations for the ode parts of the covariance function.""" @@ -138,7 +138,7 @@ class Eq_ode1(Kernpart): index_ode = self._index[self._index>0]-1 if self._t2 is None: if t_ode.size==0: - return + return t2_ode = t_ode dL_dK_ode = dL_dK_ode[:, self._index>0] index2_ode = index_ode @@ -210,7 +210,7 @@ class Eq_ode1(Kernpart): self._index = self._index[self._order] self._t = self._t[self._order] self._rorder = self._order.argsort() # rorder is for reversing the order - + if X2 is None: self._t2 = None self._index2 = None @@ -229,7 +229,7 @@ class Eq_ode1(Kernpart): if dL_dK is not None: self._dL_dK = dL_dK[self._order, :] self._dL_dK = self._dL_dK[:, self._order2] - + def _K_computations(self, X, X2): """Perform main body of computations for the ode1 covariance function.""" # First extract times and indices. @@ -253,8 +253,8 @@ class Eq_ode1(Kernpart): np.hstack((self._K_ode_eq, self._K_ode)))) self._K_dvar = self._K_dvar[self._rorder, :] self._K_dvar = self._K_dvar[:, self._rorder2] - - + + if X2 is None: # Matrix giving scales of each output self._scale = np.zeros((self._t.size, self._t.size)) @@ -303,7 +303,7 @@ class Eq_ode1(Kernpart): self._K_eq = np.zeros((t_eq.size, t2_eq.size)) return self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :]) - + self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale)) if self.is_normalized: self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale) @@ -361,7 +361,7 @@ class Eq_ode1(Kernpart): self._K_eq_ode = sK.T else: self._K_ode_eq = sK - + def _K_compute_ode(self): # Compute covariances between outputs of the ODE models. @@ -370,7 +370,7 @@ class Eq_ode1(Kernpart): if self._t2 is None: if t_ode.size==0: self._K_ode = np.zeros((0, 0)) - return + return t2_ode = t_ode index2_ode = index_ode else: @@ -379,14 +379,14 @@ class Eq_ode1(Kernpart): self._K_ode = np.zeros((t_ode.size, t2_ode.size)) return index2_ode = self._index2[self._index2>0]-1 - + # When index is identical h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary) if self._t2 is None: self._K_ode = 0.5 * (h + h.T) else: - h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary) + h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary) self._K_ode = 0.5 * (h + h2.T) if not self.is_normalized: @@ -410,28 +410,28 @@ class Eq_ode1(Kernpart): Decay = self.decay[index] if self.delay is not None: t = t - self.delay[index] - + t_squared = t*t half_sigma_decay = 0.5*self.sigma*Decay [ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma, half_sigma_decay) - + [ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay, half_sigma_decay - t/self.sigma) - + h = (sign1*np.exp(half_sigma_decay*half_sigma_decay + ln_part_1 - - log(Decay + D_j)) + - log(Decay + D_j)) - sign2*np.exp(half_sigma_decay*half_sigma_decay - (Decay + D_j)*t - + ln_part_2 + + ln_part_2 - log(Decay + D_j))) - + sigma2 = self.sigma*self.sigma if update_derivatives: - - dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h + + dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h + t*sign2*np.exp( half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2 ) @@ -439,11 +439,11 @@ class Eq_ode1(Kernpart): (-1 + np.exp(-t_squared/sigma2-Decay*t) + np.exp(-t_squared/sigma2-D_j*t) - np.exp(-(Decay + D_j)*t))) - + dh_dD_i = (dh_dD_i/(Decay+D_j)).real - - - + + + dh_dD_j = (t*sign2*np.exp( half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2 ) @@ -457,7 +457,7 @@ class Eq_ode1(Kernpart): - (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \ - Decay/2*np.exp(-(Decay+D_j)*t))""" pass - + def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False): """Helper function for computing part of the ode1 covariance function. @@ -491,7 +491,7 @@ class Eq_ode1(Kernpart): inv_sigma_diff_t = 1./self.sigma*diff_t half_sigma_decay_i = 0.5*self.sigma*Decay[:, None] - ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma, + ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma, half_sigma_decay_i - inv_sigma_diff_t, return_sign=True) ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i, @@ -529,7 +529,7 @@ class Eq_ode1(Kernpart): ) )) self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real - + # Update jth decay gradient dh_ddecay2 = (t2_mat*sign2 *np.exp( @@ -539,7 +539,7 @@ class Eq_ode1(Kernpart): ) -h) self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real - + # Update sigma gradient self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h + 2/(np.sqrt(np.pi) @@ -547,10 +547,10 @@ class Eq_ode1(Kernpart): *((-diff_t/sigma2-Decay[:, None]/2) *np.exp(-diff_t*diff_t/sigma2) + (-t2_mat/sigma2+Decay[:, None]/2) - *np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat) - - (-t_mat/sigma2-Decay[:, None]/2) - *np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat) + *np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat) + - (-t_mat/sigma2-Decay[:, None]/2) + *np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat) - Decay[:, None]/2 *np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat)))) - + return h diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 2e745b9b..b02b6994 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -243,7 +243,7 @@ class Bernoulli(Likelihood): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape #d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3)) state = np.seterr(divide='ignore') - # TODO check y \in {0, 1} or {-1, 1} + # TODO check y \\in {0, 1} or {-1, 1} d3logpdf_dlink3 = np.where(y==1, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3)) np.seterr(**state) return d3logpdf_dlink3 diff --git a/GPy/mappings/kernel.py b/GPy/mappings/kernel.py index bde2c6a2..dea86f5a 100644 --- a/GPy/mappings/kernel.py +++ b/GPy/mappings/kernel.py @@ -12,13 +12,13 @@ class Kernel(Mapping): .. math:: - f(\\mathbf{x}) = \\sum_i \alpha_i k(\\mathbf{z}_i, \\mathbf{x}) + f(\\mathbf{x}) = \\sum_i \\alpha_i k(\\mathbf{z}_i, \\mathbf{x}) or for multple outputs .. math:: - f_i(\\mathbf{x}) = \\sum_j \alpha_{i,j} k(\\mathbf{z}_i, \\mathbf{x}) + f_i(\\mathbf{x}) = \\sum_j \\alpha_{i,j} k(\\mathbf{z}_i, \\mathbf{x}) :param input_dim: dimension of input. diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py index 0280dafc..630c31b6 100644 --- a/GPy/models/state_space_main.py +++ b/GPy/models/state_space_main.py @@ -4002,10 +4002,10 @@ class ContDescrStateSpace(DescreteStateSpace): """ Linear Time-Invariant Stochastic Differential Equation (LTI SDE): - dx(t) = F x(t) dt + L d \beta ,where + dx(t) = F x(t) dt + L d \\beta ,where x(t): (vector) stochastic process - \beta: (vector) Brownian motion process + \\beta: (vector) Brownian motion process F, L: (time invariant) matrices of corresponding dimensions Qc: covariance of noise. @@ -4022,7 +4022,7 @@ class ContDescrStateSpace(DescreteStateSpace): F,L: LTI SDE matrices of corresponding dimensions Qc: matrix (n,n) - Covarince between different dimensions of noise \beta. + Covarince between different dimensions of noise \\beta. n is the dimensionality of the noise. dt: double or iterable diff --git a/GPy/models/tp_regression.py b/GPy/models/tp_regression.py index 048f6316..46e223aa 100644 --- a/GPy/models/tp_regression.py +++ b/GPy/models/tp_regression.py @@ -185,9 +185,9 @@ class TPRegression(Model): .. math:: p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df - = MVN\\left(\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y, - \frac{\nu + \beta - 2}{\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\right) - \nu := \texttt{Degrees of freedom} + = MVN\\left(\\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y, + \\frac{\\nu + \\beta - 2}{\\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\\right) + \\nu := \\texttt{Degrees of freedom} """ mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov) diff --git a/GPy/testing/test_model.py b/GPy/testing/test_model.py index aba6b2e8..72695a9f 100644 --- a/GPy/testing/test_model.py +++ b/GPy/testing/test_model.py @@ -269,8 +269,8 @@ class TestMisc: from GPy.core.parameterization.variational import NormalPosterior X_pred = NormalPosterior(X_pred_mu, X_pred_var) - # mu = \int f(x)q(x|mu,S) dx = \int 2x.q(x|mu,S) dx = 2.mu - # S = \int (f(x) - m)^2q(x|mu,S) dx = \int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S + # mu = \\int f(x)q(x|mu,S) dx = \\int 2x.q(x|mu,S) dx = 2.mu + # S = \\int (f(x) - m)^2q(x|mu,S) dx = \\int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S Y_mu_true = 2 * X_pred_mu Y_var_true = 4 * X_pred_var Y_mu_pred, Y_var_pred = m.predict_noiseless(X_pred) diff --git a/GPy/testing/test_rv_transformation.py b/GPy/testing/test_rv_transformation.py index 403d4c17..a6e4a666 100644 --- a/GPy/testing/test_rv_transformation.py +++ b/GPy/testing/test_rv_transformation.py @@ -47,7 +47,7 @@ class TestRVTransformation: # ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram') # ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation') # ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF') - # ax.set_xlabel(r'transformed $\theta$', fontsize=16) + # ax.set_xlabel(r'transformed $\\theta$', fontsize=16) # ax.set_ylabel('PDF', fontsize=16) # plt.legend(loc='best') # plt.show(block=True)