Added gibbs.py, although test is still failing.

This commit is contained in:
Neil Lawrence 2013-09-02 13:34:48 +01:00
parent 549f64892e
commit bd4c7f34ea
10 changed files with 223 additions and 33 deletions

View file

@ -69,39 +69,39 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
return kern(input_dim, [part])
# def gibbs(input_dim,variance=1., mapping=None):
# """
# Gibbs and MacKay non-stationary covariance function.
def gibbs(input_dim,variance=1., mapping=None):
"""
Gibbs and MacKay non-stationary covariance function.
# .. math::
.. math::
# r = sqrt((x_i - x_j)'*(x_i - x_j))
r = sqrt((x_i - x_j)'*(x_i - x_j))
# k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
# Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
# where :math:`l(x)` is a function giving the length scale as a function of space.
# This is the non stationary kernel proposed by Mark Gibbs in his 1997
# thesis. It is similar to an RBF but has a length scale that varies
# with input location. This leads to an additional term in front of
# the kernel.
where :math:`l(x)` is a function giving the length scale as a function of space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of
the kernel.
# The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
# :param input_dim: the number of input dimensions
# :type input_dim: int
# :param variance: the variance :math:`\sigma^2`
# :type variance: float
# :param mapping: the mapping that gives the lengthscale across the input space.
# :type mapping: GPy.core.Mapping
# :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
# :type ARD: Boolean
# :rtype: Kernpart object
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space.
:type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
# """
# part = parts.gibbs.Gibbs(input_dim,variance,mapping)
# return kern(input_dim, [part])
"""
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
return kern(input_dim, [part])
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
"""

View file

@ -547,10 +547,11 @@ class Kern_check_model(Model):
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if X2==None:
X2 = np.random.randn(num_samples2, kernel.input_dim)
if dL_dK==None:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.X = X
@ -641,3 +642,28 @@ class Kern_check_dKdiag_dX(Kern_check_model):
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
def kern_test(kern, X=None, X2=None, verbose=False):
"""This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
if X==None:
X = np.random.randn(10, kern.input_dim)
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
result = [Kern_check_model(kern, X=X).is_positive_definite(),
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose),
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose),
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose),
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose),
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)]
# Need to check
#Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)]
# but currently I think these aren't implemented.
return np.all(result)

135
GPy/kern/parts/gibbs.py Normal file
View file

@ -0,0 +1,135 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Gibbs(Kernpart):
"""
Gibbs and MacKay non-stationary covariance function.
.. math::
r = sqrt((x_i - x_j)'*(x_i - x_j))
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
where :math:`l(x)` is a function giving the length scale as a function of space and :math:`q` is the dimensionality of the input space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not ARD:
self.num_params=1+mapping.num_params
else:
raise NotImplementedError
self.mapping = mapping
self.name='gibbs'
self._set_params(np.hstack((variance, self.mapping._get_params())))
def _get_params(self):
return np.hstack((self.variance, self.mapping._get_params()))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.mapping._set_params(x[1:])
def _get_param_names(self):
return ['variance'] + self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
self._dK_computations(dL_dK)
if X2==None:
gmapping = self.mapping.df_dtheta(2*self._dL_dl[:, None], X)
else:
gmapping = self.mapping.df_dtheta(self._dL_dl[:, None], X)
gmapping += self.mapping.df_dtheta(self._dL_dl_two[:, None], X2)
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :]
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK)
target += self.mapping.df_dX(self._dL_dl[:, None], X)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
pass
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
pass
def _K_computations(self, X, X2=None):
"""Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
self._lengthscales=self.mapping.f(X)
self._lengthscales2=np.square(self._lengthscales)
if X2==None:
self._lengthscales_two = self._lengthscales
self._lengthscales_two2 = self._lengthscales2
Xsquare = np.square(X).sum(1)
self._K_dist2 = -2.*tdot(X) + Xsquare[:, None] + Xsquare[None, :]
else:
self._lengthscales_two = self.mapping.f(X2)
self._lengthscales_two2 = np.square(self._lengthscales_two)
self._K_dist2 = -2.*np.dot(X, X2.T) + np.square(X).sum(1)[:, None] + np.square(X2).sum(1)[None, :]
self._w2 = self._lengthscales2 + self._lengthscales_two2.T
prod_length = self._lengthscales*self._lengthscales_two.T
self._K_exponential = np.exp(-self._K_dist2/self._w2)
self._K_dvar = np.sign(prod_length)*(2*np.abs(prod_length)/self._w2)**(self.input_dim/2.)*np.exp(-self._K_dist2/self._w2)
def _dK_computations(self, dL_dK):
"""Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
:param dL_dK: the gradient of the objective with respect to the covariance function.
:type dL_dK: ndarray"""
self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
if self._lengthscales_two is self._lengthscales:
self._dL_dl_two = None
else:
self._dL_dl_two = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales**4 - self._lengthscales_two.T**4 ) + 2*self._lengthscales_two2.T*self._K_dist2)/(self._w2*self._w2*self._lengthscales_two.T)).sum(0)

View file

@ -103,7 +103,7 @@ class POLY(Kernpart):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_poly_arg
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]* dL_dK[:, :, None]).sum(1)
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""

View file

@ -44,7 +44,7 @@ class White(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target):
def dK_dX(self,dL_dK,X,X2,target):
pass
def dKdiag_dX(self,dL_dKdiag,X,target):

View file

@ -33,7 +33,7 @@ class Kernel(Mapping):
self.A = np.array((self.num_data, self.output_dim))
self.bias = np.array(self.output_dim)
self.randomize()
self.name = 'kernel'
def _get_param_names(self):
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]

View file

@ -20,6 +20,7 @@ class Linear(Mapping):
"""
def __init__(self, input_dim=1, output_dim=1):
self.name = 'linear'
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.num_params = self.output_dim*(self.input_dim + 1)
self.W = np.array((self.input_dim, self.output_dim))

View file

@ -26,6 +26,7 @@ class MLP(Mapping):
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.name = 'mlp'
if isinstance(hidden_dim, int):
hidden_dim = [hidden_dim]
self.hidden_dim = hidden_dim

View file

@ -59,10 +59,22 @@ Why do likelihoods still have YYT everywhere, didn't we agree to set observed da
For some reason a stub of _get_param_names(self) wasn't available in the Parameterized base class. Have put it in (is this right?)
Is there a quick FAQ or something on how to build the documentation? I did it once, but can't remember!
Is there a quick FAQ or something on how to build the documentation? I did it once, but can't remember! Have started a FAQ.txt file where we can add this type of information.
Similar for the nosetests ... even ran them last week but can't remember the command!
Now added Gaussian priors to GPLVM latent variables by default. When running the GPy.examples.dimensionality_reduction.stick() example the print out from print model has the same value for the prior+likelihood as for the prior.
For the back constrained GP-LVM need priors to be on the Xs not on the model parameters (because they aren't parameters, they are constraints). Need to work out how to do this.
For the back constrained GP-LVM need priors to be on the Xs not on the model parameters (because they aren't parameters, they are constraints). Need to work out how to do this, perhaps by creating the full GP-LVM model then constraining around it, rather than overriding inside the GP-LVM model.
This code fails:
kern = GPy.kern.rbf(2)
GPy.kern.Kern_check_dK_dX(kern, X=np.random.randn(10, 2), X2=None).checkgrad(verbose=True)
because X2 is now equal to X, so there is a factor of 2 missing. Does this every come up? Yes, in the GP-LVM, (gplvm.py, line 64) where it is called with a corrective factor of 2! And on line 241 of sparse_gp where it is also called with a corrective factor of 2! In original matlab GPLVM, didn't allow gradients with respect to X alone, and multiplied by 2 in base code, but then add diagonal across those elements. This is missing in the new code.
In white.py, line 41, Need to check here if X and X2 refer to the same reference too ... becaue up the pipeline somewhere someone may have set X2=X when X2 arrived originally equal to None.

View file

@ -26,6 +26,21 @@ class KernelTests(unittest.TestCase):
self.assertTrue(GPy.kern.Kern_check_dKdiag_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dK_dX(kern).checkgrad(verbose=verbose))
def test_gibbskernel(self):
verbose = False
kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1))
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_mlpkernel(self):
verbose = False
kern = GPy.kern.mlp(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_polykernel(self):
verbose = False
kern = GPy.kern.poly(5, degree=4)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_fixedkernel(self):
"""
Fixed effect kernel test