[doc] some changes to the doc, using mathjax some additions in math

2026-06-05 14:55:15 +02:00 · 2015-09-08 14:01:54 +01:00 · 2015-09-08 14:01:54 +01:00 · 6996912184
commit 6996912184
parent bcc1e7c8d4
11 changed files with 244 additions and 121 deletions
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -74,7 +74,7 @@ class Parameterized(Parameterizable):
    # Metaclass for parameters changed after init.
    # This makes sure, that parameters changed will always be called after __init__
    # **Never** call parameters_changed() yourself
-    #This is ignored in Python 3 -- you need to put the meta class in the function definition. 
+    #This is ignored in Python 3 -- you need to put the meta class in the function definition.
    #__metaclass__ = ParametersChangedMeta
    #The six module is used to support both Python 2 and 3 simultaneously
    #===========================================================================
@ -316,7 +316,7 @@ class Parameterized(Parameterizable):
                    param[:] = val; return
            except AttributeError:
                pass
-        object.__setattr__(self, name, val);
+        return object.__setattr__(self, name, val);

    #===========================================================================
    # Pickling
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@ -70,6 +70,9 @@ class Kern(Parameterized):
        """
        Compute the kernel function.

+        .. math::
+            K_{ij} = k(X_i, X_j)
+
        :param X: the first set of inputs to the kernel
        :param X2: (optional) the second set of arguments to the kernel. If X2
                   is None, this is passed throgh to the 'part' object, which
@ -77,24 +80,64 @@ class Kern(Parameterized):
        """
        raise NotImplementedError
    def Kdiag(self, X):
+        """
+        The diagonal of the kernel matrix K
+
+        .. math::
+            Kdiag_{i} = k(X_i, X_i)
+        """
        raise NotImplementedError
    def psi0(self, Z, variational_posterior):
+        """
+        .. math::
+            \psi_0 = \sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
+        """
        return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
    def psi1(self, Z, variational_posterior):
+        """
+        .. math::
+            \psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
+        """
        return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
    def psi2(self, Z, variational_posterior):
+        """
+        .. math::
+            \psi_2^{m,m'} = \sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
+        """
        return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
    def psi2n(self, Z, variational_posterior):
+        """
+        .. math::
+            \psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
+
+        Thus, we do not sum out n, compared to psi2
+        """
        return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
    def gradients_X(self, dL_dK, X, X2):
+        """
+        .. math::
+
+            \\frac{\partial L}{\partial X} = \\frac{\partial L}{\partial K}\\frac{\partial K}{\partial X}
+        """
        raise NotImplementedError
    def gradients_X_X2(self, dL_dK, X, X2):
        return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
    def gradients_XX(self, dL_dK, X, X2):
+        """
+        .. math::
+
+            \\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
+        """
        raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
    def gradients_XX_diag(self, dL_dKdiag, X):
+        """
+        The diagonal of the second derivative w.r.t. X and X2
+        """
        raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
    def gradients_X_diag(self, dL_dKdiag, X):
+        """
+        The diagonal of the derivative w.r.t. X
+        """
        raise NotImplementedError

    def update_gradients_diag(self, dL_dKdiag, X):
@ -110,11 +153,17 @@ class Kern(Parameterized):
        Set the gradients of all parameters when doing inference with
        uncertain inputs, using expectations of the kernel.

-        The esential maths is
+        The essential maths is

-        dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
-                        dL_dpsi1 * dpsi1_d{theta_i} +
-                        dL_dpsi2 * dpsi2_d{theta_i}
+        .. math::
+
+            \\frac{\partial L}{\partial \\theta_i} & = \\frac{\partial L}{\partial \psi_0}\\frac{\partial \psi_0}{\partial \\theta_i}\\
+                & \quad + \\frac{\partial L}{\partial \psi_1}\\frac{\partial \psi_1}{\partial \\theta_i}\\
+                & \quad + \\frac{\partial L}{\partial \psi_2}\\frac{\partial \psi_2}{\partial \\theta_i}
+
+        Thus, we push the different derivatives through the gradients of the psi
+        statistics. Be sure to set the gradients for all kernel
+        parameters here.
        """
        dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
        self.gradient[:] = dtheta
--- a/GPy/kern/_src/kernel_slice_operations.py
+++ b/GPy/kern/_src/kernel_slice_operations.py
@ -1,7 +1,11 @@
 '''
 Created on 11 Mar 2014

-@author: maxz
+@author: @mzwiessele
+
+This module provides a meta class for the kernels. The meta class is for
+slicing the inputs (X, X2) for the kernels, before K (or any other method involving X)
+gets calls. The `active_dims` of a kernel decide which dimensions the kernel works on.
 '''
 from ...core.parameterization.parameterized import ParametersChangedMeta
 import numpy as np
--- a/GPy/kern/_src/linear.py
+++ b/GPy/kern/_src/linear.py
@ -17,7 +17,7 @@ class Linear(Kern):

    .. math::

-       k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
+       k(x,y) = \sum_{i=1}^{\\text{input_dim}} \sigma^2_i x_iy_i

    :param input_dim: the number of input dimensions
    :type input_dim: int
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -25,13 +25,16 @@ class Stationary(Kern):

    Stationary covariance fucntion depend only on r, where r is defined as

-      r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
+    .. math::
+        r(x, x') = \\sqrt{ \\sum_{q=1}^Q (x_q - x'_q)^2 }

    The covariance function k(x, x' can then be written k(r).

    In this implementation, r is scaled by the lengthscales parameter(s):

-      r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
+    .. math::
+
+        r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\ell_q^2} }.

    By default, there's only one lengthscale: seaprate lengthscales for each
    dimension can be enables by setting ARD=True.
@ -39,11 +42,12 @@ class Stationary(Kern):
    To implement a stationary covariance function using this class, one need
    only define the covariance function k(r), and it derivative.

-      ...
-      def K_of_r(self, r):
-          return foo
-      def dK_dr(self, r):
-          return bar
+    ```
+    def K_of_r(self, r):
+        return foo
+    def dK_dr(self, r):
+        return bar
+    ```

    The lengthscale(s) and variance parameters are added to the structure automatically.

@ -128,7 +132,8 @@ class Stationary(Kern):
        """
        Efficiently compute the scaled distance, r.

-        r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
+        ..math::
+            r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )

        Note that if thre is only one lengthscale, l comes outside the sum. In
        this case we compute the unscaled distance first (in a separate
@ -321,7 +326,7 @@ class OU(Stationary):

    .. math::

-       k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }

    """

@ -341,7 +346,7 @@ class Matern32(Stationary):

    .. math::

-       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }

    """

@ -388,7 +393,7 @@ class Matern52(Stationary):
    .. math::

       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
-       """
+    """
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
        super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

--- a/GPy/kern/_src/trunclinear.py
+++ b/GPy/kern/_src/trunclinear.py
@ -15,7 +15,7 @@ class TruncLinear(Kern):

    .. math::

-       k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
+       k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)

    :param input_dim: the number of input dimensions
    :type input_dim: int
@ -54,7 +54,7 @@ class TruncLinear(Kern):
        self.delta = Param('delta', delta)
        self.add_parameter(self.variances)
        self.add_parameter(self.delta)
-            
+
    @Cache_this(limit=2)
    def K(self, X, X2=None):
        XX = self.variances*self._product(X, X2)
@ -114,7 +114,7 @@ class TruncLinear_inf(Kern):

    .. math::

-       k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
+       k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)

    :param input_dim: the number of input dimensions
    :type input_dim: int
@ -148,8 +148,8 @@ class TruncLinear_inf(Kern):

        self.variances = Param('variances', variances, Logexp())
        self.add_parameter(self.variances)
-        
-    
+
+
 #     @Cache_this(limit=2)
    def K(self, X, X2=None):
        tmp = self._product(X, X2)
--- a/GPy/testing/gp_tests.py
+++ b/GPy/testing/gp_tests.py
@ -88,7 +88,7 @@ class Test(unittest.TestCase):
        k.randomize()
        p = Parabola(.3)
        p.randomize()
-        Y = p.f(X) + np.random.multivariate_normal(np.zeros(X.shape[0]), k.K(X))[:,None] + np.random.normal(0, .1, (X.shape[0], 1))
+        Y = p.f(X) + np.random.multivariate_normal(np.zeros(X.shape[0]), k.K(X)+np.eye(X.shape[0])*1e-8)[:,None] + np.random.normal(0, .1, (X.shape[0], 1))
        m = GPy.models.GPRegression(X, Y, mean_function=p)
        m.randomize()
        assert(m.checkgrad())
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -62,6 +62,7 @@ class MiscTests(unittest.TestCase):
    def check_jacobian(self):
        try:
            import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt
+            from GPy.models import GradientChecker, GPRegression
        except:
            raise self.skipTest("autograd not available to check gradients")
        def k(X, X2, alpha=1., lengthscale=None):
@ -87,6 +88,20 @@ class MiscTests(unittest.TestCase):
        np.testing.assert_allclose(ke.gradients_X([[1.]], X, X2), dk(X, X2))
        np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X2).sum(0), dkdk(X, X2))

+        m = GPRegression(self.X, self.Y)
+        def f(x):
+            m.X[:] = x
+            return m.log_likelihood()
+        def df(x):
+            m.X[:] = x
+            return m.kern.gradients_X(m.grad_dict['dL_dK'], X)
+        def ddf(x):
+            m.X[:] = x
+            return m.kern.gradients_XX(m.grad_dict['dL_dK'], X).sum(0)
+        gc = GradientChecker(f, df, self.X)
+        gc2 = GradientChecker(df, ddf, self.X)
+        assert(gc.checkgrad())
+        assert(gc2.checkgrad())

    def test_sparse_raw_predict(self):
        k = GPy.kern.RBF(1)