[doc] some changes to the doc, using mathjax some additions in math

This commit is contained in:
Max Zwiessele 2015-09-08 14:01:54 +01:00
parent bcc1e7c8d4
commit 6996912184
11 changed files with 244 additions and 121 deletions

View file

@ -74,7 +74,7 @@ class Parameterized(Parameterizable):
# Metaclass for parameters changed after init.
# This makes sure, that parameters changed will always be called after __init__
# **Never** call parameters_changed() yourself
#This is ignored in Python 3 -- you need to put the meta class in the function definition.
#This is ignored in Python 3 -- you need to put the meta class in the function definition.
#__metaclass__ = ParametersChangedMeta
#The six module is used to support both Python 2 and 3 simultaneously
#===========================================================================
@ -316,7 +316,7 @@ class Parameterized(Parameterizable):
param[:] = val; return
except AttributeError:
pass
object.__setattr__(self, name, val);
return object.__setattr__(self, name, val);
#===========================================================================
# Pickling

View file

@ -70,6 +70,9 @@ class Kern(Parameterized):
"""
Compute the kernel function.
.. math::
K_{ij} = k(X_i, X_j)
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
@ -77,24 +80,64 @@ class Kern(Parameterized):
"""
raise NotImplementedError
def Kdiag(self, X):
"""
The diagonal of the kernel matrix K
.. math::
Kdiag_{i} = k(X_i, X_i)
"""
raise NotImplementedError
def psi0(self, Z, variational_posterior):
"""
.. math::
\psi_0 = \sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
"""
.. math::
\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
"""
.. math::
\psi_2^{m,m'} = \sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
def psi2n(self, Z, variational_posterior):
"""
.. math::
\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
Thus, we do not sum out n, compared to psi2
"""
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def gradients_X(self, dL_dK, X, X2):
"""
.. math::
\\frac{\partial L}{\partial X} = \\frac{\partial L}{\partial K}\\frac{\partial K}{\partial X}
"""
raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2):
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
def gradients_XX(self, dL_dK, X, X2):
"""
.. math::
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
"""
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X):
"""
The diagonal of the second derivative w.r.t. X and X2
"""
raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_X_diag(self, dL_dKdiag, X):
"""
The diagonal of the derivative w.r.t. X
"""
raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X):
@ -110,11 +153,17 @@ class Kern(Parameterized):
Set the gradients of all parameters when doing inference with
uncertain inputs, using expectations of the kernel.
The esential maths is
The essential maths is
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i}
.. math::
\\frac{\partial L}{\partial \\theta_i} & = \\frac{\partial L}{\partial \psi_0}\\frac{\partial \psi_0}{\partial \\theta_i}\\
& \quad + \\frac{\partial L}{\partial \psi_1}\\frac{\partial \psi_1}{\partial \\theta_i}\\
& \quad + \\frac{\partial L}{\partial \psi_2}\\frac{\partial \psi_2}{\partial \\theta_i}
Thus, we push the different derivatives through the gradients of the psi
statistics. Be sure to set the gradients for all kernel
parameters here.
"""
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
self.gradient[:] = dtheta

View file

@ -1,7 +1,11 @@
'''
Created on 11 Mar 2014
@author: maxz
@author: @mzwiessele
This module provides a meta class for the kernels. The meta class is for
slicing the inputs (X, X2) for the kernels, before K (or any other method involving X)
gets calls. The `active_dims` of a kernel decide which dimensions the kernel works on.
'''
from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np

View file

@ -17,7 +17,7 @@ class Linear(Kern):
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
k(x,y) = \sum_{i=1}^{\\text{input_dim}} \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int

View file

@ -25,13 +25,16 @@ class Stationary(Kern):
Stationary covariance fucntion depend only on r, where r is defined as
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
.. math::
r(x, x') = \\sqrt{ \\sum_{q=1}^Q (x_q - x'_q)^2 }
The covariance function k(x, x' can then be written k(r).
In this implementation, r is scaled by the lengthscales parameter(s):
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
.. math::
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True.
@ -39,11 +42,12 @@ class Stationary(Kern):
To implement a stationary covariance function using this class, one need
only define the covariance function k(r), and it derivative.
...
def K_of_r(self, r):
return foo
def dK_dr(self, r):
return bar
```
def K_of_r(self, r):
return foo
def dK_dr(self, r):
return bar
```
The lengthscale(s) and variance parameters are added to the structure automatically.
@ -128,7 +132,8 @@ class Stationary(Kern):
"""
Efficiently compute the scaled distance, r.
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
..math::
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate
@ -321,7 +326,7 @@ class OU(Stationary):
.. math::
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
@ -341,7 +346,7 @@ class Matern32(Stationary):
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
@ -388,7 +393,7 @@ class Matern52(Stationary):
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
"""
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

View file

@ -15,7 +15,7 @@ class TruncLinear(Kern):
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
:param input_dim: the number of input dimensions
:type input_dim: int
@ -54,7 +54,7 @@ class TruncLinear(Kern):
self.delta = Param('delta', delta)
self.add_parameter(self.variances)
self.add_parameter(self.delta)
@Cache_this(limit=2)
def K(self, X, X2=None):
XX = self.variances*self._product(X, X2)
@ -114,7 +114,7 @@ class TruncLinear_inf(Kern):
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
:param input_dim: the number of input dimensions
:type input_dim: int
@ -148,8 +148,8 @@ class TruncLinear_inf(Kern):
self.variances = Param('variances', variances, Logexp())
self.add_parameter(self.variances)
# @Cache_this(limit=2)
def K(self, X, X2=None):
tmp = self._product(X, X2)

View file

@ -88,7 +88,7 @@ class Test(unittest.TestCase):
k.randomize()
p = Parabola(.3)
p.randomize()
Y = p.f(X) + np.random.multivariate_normal(np.zeros(X.shape[0]), k.K(X))[:,None] + np.random.normal(0, .1, (X.shape[0], 1))
Y = p.f(X) + np.random.multivariate_normal(np.zeros(X.shape[0]), k.K(X)+np.eye(X.shape[0])*1e-8)[:,None] + np.random.normal(0, .1, (X.shape[0], 1))
m = GPy.models.GPRegression(X, Y, mean_function=p)
m.randomize()
assert(m.checkgrad())

View file

@ -62,6 +62,7 @@ class MiscTests(unittest.TestCase):
def check_jacobian(self):
try:
import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt
from GPy.models import GradientChecker, GPRegression
except:
raise self.skipTest("autograd not available to check gradients")
def k(X, X2, alpha=1., lengthscale=None):
@ -87,6 +88,20 @@ class MiscTests(unittest.TestCase):
np.testing.assert_allclose(ke.gradients_X([[1.]], X, X2), dk(X, X2))
np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X2).sum(0), dkdk(X, X2))
m = GPRegression(self.X, self.Y)
def f(x):
m.X[:] = x
return m.log_likelihood()
def df(x):
m.X[:] = x
return m.kern.gradients_X(m.grad_dict['dL_dK'], X)
def ddf(x):
m.X[:] = x
return m.kern.gradients_XX(m.grad_dict['dL_dK'], X).sum(0)
gc = GradientChecker(f, df, self.X)
gc2 = GradientChecker(df, ddf, self.X)
assert(gc.checkgrad())
assert(gc2.checkgrad())
def test_sparse_raw_predict(self):
k = GPy.kern.RBF(1)