Merge branch 'devel' of github.com:SheffieldML/GPy into devel

Conflicts:
	GPy/examples/regression.py
	GPy/kern/constructors.py
	GPy/testing/kernel_tests.py
	GPy/util/multioutput.py
This commit is contained in:
Ricardo 2013-09-16 12:32:42 +01:00
commit 603aa18482
25 changed files with 400 additions and 72 deletions

View file

@ -5,7 +5,6 @@ import numpy as np
from kern import kern
import parts
def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
"""
Construct an RBF kernel
@ -103,6 +102,12 @@ def gibbs(input_dim,variance=1., mapping=None):
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
return kern(input_dim, [part])
def hetero(input_dim, mapping=None, transform=None):
"""
"""
part = parts.hetero.Hetero(input_dim,mapping,transform)
return kern(input_dim, [part])
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
"""
Construct a polynomial kernel
@ -135,6 +140,7 @@ def white(input_dim,variance=1.):
part = parts.white.White(input_dim,variance)
return kern(input_dim, [part])
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
"""
Construct an exponential kernel

View file

@ -12,7 +12,9 @@ from matplotlib.transforms import offset_copy
class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None):
"""
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
This is the main kernel class for GPy. It handles multiple
(additive) kernel functions, and keeps track of various things
like which parameters live where.
The technical code for kernels is divided into _parts_ (see
e.g. rbf.py). This object contains a list of parts, which are
@ -33,6 +35,11 @@ class kern(Parameterized):
self.input_dim = input_dim
part_names = [k.name for k in self.parts]
self.name=''
for name in part_names:
self.name += name + '+'
self.name = self.name[:-1]
# deal with input_slices
if input_slices is None:
self.input_slices = [slice(None) for p in self.parts]
@ -333,10 +340,8 @@ class kern(Parameterized):
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
if X2 is None:
X2 = X
target = np.zeros_like(X)
if X2 is None:
if X2 is None:
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
else:
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
@ -653,17 +658,85 @@ def kern_test(kern, X=None, X2=None, verbose=False):
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
result = [Kern_check_model(kern, X=X).is_positive_definite(),
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose),
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose),
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose),
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose),
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)]
# Need to check
#Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)]
# but currently I think these aren't implemented.
return np.all(result)
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

View file

@ -98,9 +98,13 @@ class Matern32(Kernpart):
def dK_dX(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)

View file

@ -98,9 +98,12 @@ class Matern52(Kernpart):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)

View file

@ -5,6 +5,8 @@ import exponential
import finite_dimensional
import fixed
import gibbs
#import hetero #hetero.py is not commited: omitting for now. JH.
import hierarchical
import independent_outputs
import linear
import Matern32
@ -19,8 +21,7 @@ import prod
import rational_quadratic
import rbfcos
import rbf
import rbf_inv
import spline
import symmetric
import white
import hierarchical
import rbf_inv

View file

@ -9,17 +9,18 @@ from scipy import weave
class Coregionalize(Kernpart):
"""
Kernel for intrinsic/linear coregionalization models
Covariance function for intrinsic/linear coregionalization models
This kernel has the form
This covariance has the form
.. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
An intrinsic/linear coregionalization kernel of the form
An intrinsic/linear coregionalization covariance function of the form
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtainded as the tensor product between a kernel k(x,y) and B.
it is obtained as the tensor product between a covariance function
k(x,y) and B.
:param num_outputs: number of outputs to coregionalize
:type num_outputs: int

View file

@ -9,7 +9,7 @@ import GPy
class Gibbs(Kernpart):
"""
Gibbs and MacKay non-stationary covariance function.
Gibbs non-stationary covariance function.
.. math::
@ -25,7 +25,10 @@ class Gibbs(Kernpart):
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
The parameters are :math:`\sigma^2`, the process variance, and
the parameters of l(x) which is a function that can be
specified by the user, by default an multi-layer peceptron is
used.
:param input_dim: the number of input dimensions
:type input_dim: int
@ -37,6 +40,15 @@ class Gibbs(Kernpart):
:type ARD: Boolean
:rtype: Kernpart object
See Mark Gibbs's thesis for more details: Gibbs,
M. N. (1997). Bayesian Gaussian Processes for Regression and
Classification. PhD thesis, Department of Physics, University of
Cambridge. Or also see Page 93 of Gaussian Processes for Machine
Learning by Rasmussen and Williams. Although note that we do not
constrain the lengthscale to be positive by default. This allows
anticorrelation to occur. The positive constraint can be included
by the user manually.
"""
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
@ -89,12 +101,18 @@ class Gibbs(Kernpart):
"""Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :]
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK)
target += self.mapping.df_dX(self._dL_dl[:, None], X)
if X2 is None:
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
else:
target += self.mapping.df_dX(self._dL_dl[:, None], X)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
@ -102,7 +120,8 @@ class Gibbs(Kernpart):
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
pass
target[0] += np.sum(dL_dKdiag)
def _K_computations(self, X, X2=None):

101
GPy/kern/parts/hetero.py Normal file
View file

@ -0,0 +1,101 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from IPython.core.debugger import Tracer; debug_here=Tracer()
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Hetero(Kernpart):
"""
TODO: Need to constrain the function outputs positive (still thinking of best way of doing this!!! Yes, intend to use transformations, but what's the *best* way). Currently just squaring output.
Heteroschedastic noise which depends on input location. See, for example, this paper by Goldberg et al.
.. math::
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
The parameters are the parameters of \sigma^2(x) which is a
function that can be specified by the user, by default an
multi-layer peceptron is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:rtype: Kernpart object
See this paper:
Goldberg, P. W. Williams, C. K. I. and Bishop,
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
Process Treatment In Advances in Neural Information Processing
Systems, Volume 10, pp. 493-499. MIT Press
for a Gaussian process treatment of this problem.
"""
def __init__(self, input_dim, mapping=None, transform=None):
self.input_dim = input_dim
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not transform:
transform = GPy.core.transformations.logexp()
self.transform = transform
self.mapping = mapping
self.name='hetero'
self.num_params=self.mapping.num_params
self._set_params(self.mapping._get_params())
def _get_params(self):
return self.mapping._get_params()
def _set_params(self, x):
assert x.size == (self.num_params)
self.mapping._set_params(x)
def _get_param_names(self):
return self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
if X2==None or X2 is X:
target[np.diag_indices_from(target)] += self._Kdiag(X)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
target+=self._Kdiag(X)
def _Kdiag(self, X):
"""Helper function for computing the diagonal elements of the covariance."""
return self.mapping.f(X).flatten()**2
def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dtheta(dL_dKdiag, X, target)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None], X)*self.mapping.f(X)
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dX(dL_dKdiag, X, target)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)

View file

@ -59,6 +59,45 @@ class Kernpart(object):
def dK_dX(self, dL_dK, X, X2, target):
raise NotImplementedError
class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params = 2
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
self.lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
self.lengthscale = np.ones(self.input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
def _set_params(self, x):
self.lengthscale = x
self.lengthscale2 = np.square(self.lengthscale)
# reset cached results
self._X, self._X2, self._params = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def dKdiag_dtheta(self, dL_dKdiag, X, target):
# For stationary covariances, derivative of diagonal elements
# wrt lengthscale is 0.
target[0] += np.sum(dL_dKdiag)
class Kernpart_inner(Kernpart):
def __init__(self,input_dim):
"""
@ -74,3 +113,5 @@ class Kernpart_inner(Kernpart):
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))

View file

@ -99,7 +99,10 @@ class Linear(Kernpart):
target += tmp.sum()
def dK_dX(self, dL_dK, X, X2, target):
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self,dL_dKdiag,X,target):
target += 2.*self.variances*dL_dKdiag[:,None]*X

View file

@ -110,9 +110,13 @@ class MLP(Kernpart):
arg = self._K_asin_arg
numer = self._K_numer
denom = self._K_denom
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
denom3 = denom*denom*denom
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
if X2 is not None:
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
else:
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
target += 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""

View file

@ -103,7 +103,10 @@ class POLY(Kernpart):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_poly_arg
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
if X2 is None:
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
else:
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""

View file

@ -70,10 +70,12 @@ class RationalQuadratic(Kernpart):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
if X2 is None:
dist2 = np.square((X-X.T)/self.lengthscale)
dX = -2.*self.variance*self.power * (X-X.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
else:
dist2 = np.square((X-X2.T)/self.lengthscale)
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
def dKdiag_dX(self,dL_dKdiag,X,target):

View file

@ -138,7 +138,10 @@ class RBF(Kernpart):
def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)

View file

@ -133,7 +133,10 @@ class RBFInv(RBF):
def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)