Merge branch 'devel' of github.com:SheffieldML/GPy into devel

Conflicts:
	GPy/examples/regression.py
	GPy/kern/constructors.py
	GPy/testing/kernel_tests.py
	GPy/util/multioutput.py
This commit is contained in:
Ricardo 2013-09-16 12:32:42 +01:00
commit 603aa18482
25 changed files with 400 additions and 72 deletions

View file

@ -173,7 +173,7 @@ class FITC(SparseGP):
def dL_dZ(self):
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ

View file

@ -1,4 +1,4 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
@ -459,8 +459,8 @@ class Model(Parameterized):
gradient = self.objective_function_gradients(x)
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
else:
# check the gradient of each parameter individually, and do some pretty printing
@ -499,7 +499,7 @@ class Model(Parameterized):
gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * gradient)
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient))
difference = np.abs((f1 - f2) / 2 / step - gradient)
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:

View file

@ -254,7 +254,7 @@ class SparseGP(GPBase):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)

View file

@ -18,9 +18,11 @@ class transformation(object):
def gradfactor(self, f):
""" df_dx evaluated at self.f(x)=f"""
raise NotImplementedError
def initialize(self, f):
""" produce a sensible initial value for f(x)"""
raise NotImplementedError
def __str__(self):
raise NotImplementedError
@ -42,15 +44,13 @@ class logexp(transformation):
class negative_logexp(transformation):
domain = NEGATIVE
def f(self, x):
return -logexp.f(x) #np.log(1. + np.exp(x))
return -logexp.f(x)
def finv(self, f):
return logexp.finv(-f) #np.log(np.exp(-f) - 1.)
return logexp.finv(-f)
def gradfactor(self, f):
return -logexp.gradfactor(-f)
#ef = np.exp(-f)
#return -(ef - 1.) / ef
def initialize(self, f):
return -logexp.initialize(f) #np.abs(f)
return -logexp.initialize(f)
def __str__(self):
return '(-ve)'
@ -82,7 +82,6 @@ class logexp_clipped(logexp):
return '(+ve_c)'
class exponent(transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative exponent below. See old MATLAB code.
domain = POSITIVE
def f(self, x):
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))

View file

@ -81,7 +81,6 @@ def coregionalization_sparse(max_iters=100):
k1 = GPy.kern.rbf(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=20)
#m.constrain_fixed('iip')
m.constrain_bounded('noise_variance', 1e-3, 1e-1)
# m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')

View file

@ -5,7 +5,6 @@ import numpy as np
from kern import kern
import parts
def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
"""
Construct an RBF kernel
@ -103,6 +102,12 @@ def gibbs(input_dim,variance=1., mapping=None):
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
return kern(input_dim, [part])
def hetero(input_dim, mapping=None, transform=None):
"""
"""
part = parts.hetero.Hetero(input_dim,mapping,transform)
return kern(input_dim, [part])
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
"""
Construct a polynomial kernel
@ -135,6 +140,7 @@ def white(input_dim,variance=1.):
part = parts.white.White(input_dim,variance)
return kern(input_dim, [part])
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
"""
Construct an exponential kernel

View file

@ -12,7 +12,9 @@ from matplotlib.transforms import offset_copy
class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None):
"""
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
This is the main kernel class for GPy. It handles multiple
(additive) kernel functions, and keeps track of various things
like which parameters live where.
The technical code for kernels is divided into _parts_ (see
e.g. rbf.py). This object contains a list of parts, which are
@ -33,6 +35,11 @@ class kern(Parameterized):
self.input_dim = input_dim
part_names = [k.name for k in self.parts]
self.name=''
for name in part_names:
self.name += name + '+'
self.name = self.name[:-1]
# deal with input_slices
if input_slices is None:
self.input_slices = [slice(None) for p in self.parts]
@ -333,10 +340,8 @@ class kern(Parameterized):
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
if X2 is None:
X2 = X
target = np.zeros_like(X)
if X2 is None:
if X2 is None:
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
else:
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
@ -653,17 +658,85 @@ def kern_test(kern, X=None, X2=None, verbose=False):
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
result = [Kern_check_model(kern, X=X).is_positive_definite(),
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose),
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose),
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose),
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose),
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)]
# Need to check
#Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)]
# but currently I think these aren't implemented.
return np.all(result)
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

View file

@ -98,9 +98,13 @@ class Matern32(Kernpart):
def dK_dX(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)

View file

@ -98,9 +98,12 @@ class Matern52(Kernpart):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)

View file

@ -5,6 +5,8 @@ import exponential
import finite_dimensional
import fixed
import gibbs
#import hetero #hetero.py is not commited: omitting for now. JH.
import hierarchical
import independent_outputs
import linear
import Matern32
@ -19,8 +21,7 @@ import prod
import rational_quadratic
import rbfcos
import rbf
import rbf_inv
import spline
import symmetric
import white
import hierarchical
import rbf_inv

View file

@ -9,17 +9,18 @@ from scipy import weave
class Coregionalize(Kernpart):
"""
Kernel for intrinsic/linear coregionalization models
Covariance function for intrinsic/linear coregionalization models
This kernel has the form
This covariance has the form
.. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
An intrinsic/linear coregionalization kernel of the form
An intrinsic/linear coregionalization covariance function of the form
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtainded as the tensor product between a kernel k(x,y) and B.
it is obtained as the tensor product between a covariance function
k(x,y) and B.
:param num_outputs: number of outputs to coregionalize
:type num_outputs: int

View file

@ -9,7 +9,7 @@ import GPy
class Gibbs(Kernpart):
"""
Gibbs and MacKay non-stationary covariance function.
Gibbs non-stationary covariance function.
.. math::
@ -25,7 +25,10 @@ class Gibbs(Kernpart):
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
The parameters are :math:`\sigma^2`, the process variance, and
the parameters of l(x) which is a function that can be
specified by the user, by default an multi-layer peceptron is
used.
:param input_dim: the number of input dimensions
:type input_dim: int
@ -37,6 +40,15 @@ class Gibbs(Kernpart):
:type ARD: Boolean
:rtype: Kernpart object
See Mark Gibbs's thesis for more details: Gibbs,
M. N. (1997). Bayesian Gaussian Processes for Regression and
Classification. PhD thesis, Department of Physics, University of
Cambridge. Or also see Page 93 of Gaussian Processes for Machine
Learning by Rasmussen and Williams. Although note that we do not
constrain the lengthscale to be positive by default. This allows
anticorrelation to occur. The positive constraint can be included
by the user manually.
"""
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
@ -89,12 +101,18 @@ class Gibbs(Kernpart):
"""Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :]
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK)
target += self.mapping.df_dX(self._dL_dl[:, None], X)
if X2 is None:
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
else:
target += self.mapping.df_dX(self._dL_dl[:, None], X)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
@ -102,7 +120,8 @@ class Gibbs(Kernpart):
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
pass
target[0] += np.sum(dL_dKdiag)
def _K_computations(self, X, X2=None):

101
GPy/kern/parts/hetero.py Normal file
View file

@ -0,0 +1,101 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from IPython.core.debugger import Tracer; debug_here=Tracer()
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Hetero(Kernpart):
"""
TODO: Need to constrain the function outputs positive (still thinking of best way of doing this!!! Yes, intend to use transformations, but what's the *best* way). Currently just squaring output.
Heteroschedastic noise which depends on input location. See, for example, this paper by Goldberg et al.
.. math::
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
The parameters are the parameters of \sigma^2(x) which is a
function that can be specified by the user, by default an
multi-layer peceptron is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:rtype: Kernpart object
See this paper:
Goldberg, P. W. Williams, C. K. I. and Bishop,
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
Process Treatment In Advances in Neural Information Processing
Systems, Volume 10, pp. 493-499. MIT Press
for a Gaussian process treatment of this problem.
"""
def __init__(self, input_dim, mapping=None, transform=None):
self.input_dim = input_dim
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not transform:
transform = GPy.core.transformations.logexp()
self.transform = transform
self.mapping = mapping
self.name='hetero'
self.num_params=self.mapping.num_params
self._set_params(self.mapping._get_params())
def _get_params(self):
return self.mapping._get_params()
def _set_params(self, x):
assert x.size == (self.num_params)
self.mapping._set_params(x)
def _get_param_names(self):
return self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
if X2==None or X2 is X:
target[np.diag_indices_from(target)] += self._Kdiag(X)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
target+=self._Kdiag(X)
def _Kdiag(self, X):
"""Helper function for computing the diagonal elements of the covariance."""
return self.mapping.f(X).flatten()**2
def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dtheta(dL_dKdiag, X, target)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None], X)*self.mapping.f(X)
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dX(dL_dKdiag, X, target)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)

View file

@ -59,6 +59,45 @@ class Kernpart(object):
def dK_dX(self, dL_dK, X, X2, target):
raise NotImplementedError
class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params = 2
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
self.lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
self.lengthscale = np.ones(self.input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
def _set_params(self, x):
self.lengthscale = x
self.lengthscale2 = np.square(self.lengthscale)
# reset cached results
self._X, self._X2, self._params = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def dKdiag_dtheta(self, dL_dKdiag, X, target):
# For stationary covariances, derivative of diagonal elements
# wrt lengthscale is 0.
target[0] += np.sum(dL_dKdiag)
class Kernpart_inner(Kernpart):
def __init__(self,input_dim):
"""
@ -74,3 +113,5 @@ class Kernpart_inner(Kernpart):
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))

View file

@ -99,7 +99,10 @@ class Linear(Kernpart):
target += tmp.sum()
def dK_dX(self, dL_dK, X, X2, target):
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self,dL_dKdiag,X,target):
target += 2.*self.variances*dL_dKdiag[:,None]*X

View file

@ -110,9 +110,13 @@ class MLP(Kernpart):
arg = self._K_asin_arg
numer = self._K_numer
denom = self._K_denom
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
denom3 = denom*denom*denom
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
if X2 is not None:
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
else:
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
target += 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""

View file

@ -103,7 +103,10 @@ class POLY(Kernpart):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_poly_arg
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
if X2 is None:
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
else:
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""

View file

@ -70,10 +70,12 @@ class RationalQuadratic(Kernpart):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
if X2 is None:
dist2 = np.square((X-X.T)/self.lengthscale)
dX = -2.*self.variance*self.power * (X-X.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
else:
dist2 = np.square((X-X2.T)/self.lengthscale)
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
def dKdiag_dX(self,dL_dKdiag,X,target):

View file

@ -138,7 +138,10 @@ class RBF(Kernpart):
def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)

View file

@ -133,7 +133,10 @@ class RBFInv(RBF):
def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)

View file

@ -44,7 +44,7 @@ class BCGPLVM(GPLVM):
GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self):
dL_df = 2.*self.kern.dK_dX(self.dL_dK, self.X)
dL_df = self.kern.dK_dX(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -61,7 +61,7 @@ class GPLVM(GP):
GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self):
dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X)
dL_dX = self.kern.dK_dX(self.dL_dK, self.X)
return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -38,7 +38,7 @@ In sparse GPs wouldn't it be clearer to call Z inducing?
In coregionalisation matrix, setting the W to all ones will (surely?) ensure that symmetry isn't broken. Also, but allowing it to scale like that, the output variance increases as rank is increased (and if user sets rank to more than output dim they could get very different results).
We are inconsistent about our use of ise and ize e.g. optimize and normalize_X, but coregionalise, we should choose one and stick to it. Suggest -ize.
We are inconsistent about our use of ise and ize e.g. optimize and normalize_X, but coregionalise, we should choose one and stick to it. Suggest -ize. Neil- I'm imposing the US spellings to keep things consistent, so -ize it is.
Exceptions: we need to provide a list of exceptions we throw and specify what is thrown where.

View file

@ -5,6 +5,7 @@ import unittest
import numpy as np
import GPy
verbose = False
class KernelTests(unittest.TestCase):
def test_kerneltie(self):
@ -17,25 +18,54 @@ class KernelTests(unittest.TestCase):
self.assertTrue(m.checkgrad())
def test_rbfkernel(self):
verbose = False
kern = GPy.kern.rbf(5)
self.assertTrue(GPy.kern.Kern_check_model(kern).is_positive_definite())
self.assertTrue(GPy.kern.Kern_check_dK_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dKdiag_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dK_dX(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_rbf_invkernel(self):
kern = GPy.kern.rbf_inv(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_Matern32kernel(self):
kern = GPy.kern.Matern32(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_Matern52kernel(self):
kern = GPy.kern.Matern52(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_linearkernel(self):
kern = GPy.kern.linear(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_exponentialkernel(self):
kern = GPy.kern.periodic_exponential(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_Matern32kernel(self):
kern = GPy.kern.periodic_Matern32(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_Matern52kernel(self):
kern = GPy.kern.periodic_Matern52(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_rational_quadratickernel(self):
kern = GPy.kern.rational_quadratic(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_gibbskernel(self):
verbose = False
kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1))
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_heterokernel(self):
kern = GPy.kern.hetero(5, mapping=GPy.mappings.Linear(5, 1), transform=GPy.core.transformations.logexp())
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_mlpkernel(self):
verbose = False
kern = GPy.kern.mlp(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_polykernel(self):
verbose = False
kern = GPy.kern.poly(5, degree=4)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
@ -46,9 +76,8 @@ class KernelTests(unittest.TestCase):
X = np.random.rand(30, 4)
K = np.dot(X, X.T)
kernel = GPy.kern.fixed(4, K)
Y = np.ones((30,1))
m = GPy.models.GPRegression(X,Y,kernel=kernel)
self.assertTrue(m.checkgrad())
kern = GPy.kern.poly(5, degree=4)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_coregionalization(self):
X1 = np.random.rand(50,1)*8
@ -61,9 +90,8 @@ class KernelTests(unittest.TestCase):
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalize(2,1)
k = k1.prod(k2,tensor=True)
m = GPy.models.GPRegression(X,Y,kernel=k)
self.assertTrue(m.checkgrad())
kern = k1**k2
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
if __name__ == "__main__":

35
GPy/util/multioutput.py Normal file
View file

@ -0,0 +1,35 @@
import numpy as np
import warnings
from .. import kern
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None):
#TODO build_icm or build_lcm
"""
Builds a kernel for a linear coregionalization model
:input_dim: Input dimensionality
:num_outputs: Number of outputs
:param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalize kernel).
:param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalize kernel
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
:type W_columns: integer
"""
for k in CK:
if k.input_dim <> input_dim:
k.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
for k in NC:
if k.input_dim <> input_dim + 1:
k.input_dim = input_dim + 1
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
kernel = CK[0].prod(kern.coregionalize(num_outputs,W_columns,W,kappa),tensor=True)
for k in CK[1:]:
k_coreg = kern.coregionalize(num_outputs,W_columns,W,kappa)
kernel += k.prod(k_coreg,tensor=True)
for k in NC:
kernel += k
return kernel