mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-03 00:32:39 +02:00
Merging changed files.
This commit is contained in:
commit
4154a4afb6
40 changed files with 555 additions and 415 deletions
|
|
@ -17,6 +17,7 @@ def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -33,6 +34,7 @@ def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -41,11 +43,13 @@ def linear(input_dim,variances=None,ARD=False):
|
|||
"""
|
||||
Construct a linear kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dimD (int), obligatory
|
||||
variances (np.ndarray)
|
||||
ARD (boolean)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variances:
|
||||
:type variances: np.ndarray
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.linear.Linear(input_dim,variances,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -64,39 +68,42 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False
|
|||
:type bias_variance: float
|
||||
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def gibbs(input_dim,variance=1., mapping=None):
|
||||
"""
|
||||
|
||||
Gibbs and MacKay non-stationary covariance function.
|
||||
|
||||
.. math::
|
||||
|
||||
r = sqrt((x_i - x_j)'*(x_i - x_j))
|
||||
r = \\sqrt{((x_i - x_j)'*(x_i - x_j))}
|
||||
|
||||
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
|
||||
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
|
||||
Z = \\sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
|
||||
|
||||
where :math:`l(x)` is a function giving the length scale as a function of space.
|
||||
This is the non stationary kernel proposed by Mark Gibbs in his 1997
|
||||
thesis. It is similar to an RBF but has a length scale that varies
|
||||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
Where :math:`l(x)` is a function giving the length scale as a function of space.
|
||||
|
||||
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
||||
This is the non stationary kernel proposed by Mark Gibbs in his 1997
|
||||
thesis. It is similar to an RBF but has a length scale that varies
|
||||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param mapping: the mapping that gives the lengthscale across the input space.
|
||||
:type mapping: GPy.core.Mapping
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
The parameters are :math:`\\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\\sigma^2`
|
||||
:type variance: float
|
||||
:param mapping: the mapping that gives the lengthscale across the input space.
|
||||
:type mapping: GPy.core.Mapping
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
"""
|
||||
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
|
||||
|
|
@ -124,6 +131,7 @@ def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2,
|
|||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -132,10 +140,11 @@ def white(input_dim,variance=1.):
|
|||
"""
|
||||
Construct a white kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dimD (int), obligatory
|
||||
variance (float)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.white.White(input_dim,variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -180,6 +189,7 @@ def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -196,6 +206,7 @@ def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -212,6 +223,7 @@ def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -220,10 +232,11 @@ def bias(input_dim, variance=1.):
|
|||
"""
|
||||
Construct a bias kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dim (int), obligatory
|
||||
variance (float)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.bias.Bias(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -231,10 +244,15 @@ def bias(input_dim, variance=1.):
|
|||
def finite_dimensional(input_dim, F, G, variances=1., weights=None):
|
||||
"""
|
||||
Construct a finite dimensional kernel.
|
||||
input_dim: int - the number of input dimensions
|
||||
F: np.array of functions with shape (n,) - the n basis functions
|
||||
G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
variances : np.ndarray with shape (n,)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param F: np.array of functions with shape (n,) - the n basis functions
|
||||
:type F: np.array
|
||||
:param G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
:type G: np.array
|
||||
:param variances: np.ndarray with shape (n,)
|
||||
:type: np.ndarray
|
||||
"""
|
||||
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -247,6 +265,7 @@ def spline(input_dim, variance=1.):
|
|||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.spline.Spline(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -259,6 +278,7 @@ def Brownian(input_dim, variance=1.):
|
|||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.Brownian.Brownian(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -312,6 +332,7 @@ def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 *
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -330,6 +351,7 @@ def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -348,6 +370,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -361,6 +384,7 @@ def prod(k1,k2,tensor=False):
|
|||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
||||
:type tensor: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
part = parts.prod.Prod(k1, k2, tensor)
|
||||
return kern(part.input_dim, [part])
|
||||
|
|
@ -376,10 +400,12 @@ def symmetric(k):
|
|||
def coregionalize(output_dim,rank=1, W=None, kappa=None):
|
||||
"""
|
||||
Coregionlization matrix B, of the form:
|
||||
|
||||
.. math::
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
||||
|
||||
An intrinsic/linear coregionalization kernel of the form
|
||||
An intrinsic/linear coregionalization kernel of the form:
|
||||
|
||||
.. math::
|
||||
k_2(x, y)=\mathbf{B} k(x, y)
|
||||
|
||||
|
|
@ -449,7 +475,7 @@ def independent_outputs(k):
|
|||
|
||||
def hierarchical(k):
|
||||
"""
|
||||
TODO THis can't be right! Construct a kernel with independent outputs from an existing kernel
|
||||
TODO This can't be right! Construct a kernel with independent outputs from an existing kernel
|
||||
"""
|
||||
# for sl in k.input_slices:
|
||||
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
||||
|
|
@ -467,7 +493,8 @@ def build_lcm(input_dim, output_dim, kernel_list = [], rank=1,W=None,kappa=None)
|
|||
:param rank: number tuples of the corregionalization parameters 'coregion_W'
|
||||
:type rank: integer
|
||||
|
||||
..Note the kernels dimensionality is overwritten to fit input_dim
|
||||
..note the kernels dimensionality is overwritten to fit input_dim
|
||||
|
||||
"""
|
||||
|
||||
for k in kernel_list:
|
||||
|
|
|
|||
|
|
@ -78,13 +78,15 @@ class kern(Parameterized):
|
|||
|
||||
|
||||
def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
|
||||
"""If an ARD kernel is present, it bar-plots the ARD parameters,
|
||||
"""If an ARD kernel is present, it bar-plots the ARD parameters.
|
||||
|
||||
:param fignum: figure number of the plot
|
||||
:param ax: matplotlib axis to plot on
|
||||
:param title:
|
||||
title of the plot,
|
||||
pass '' to not print a title
|
||||
pass None for a generic title
|
||||
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pb.figure(fignum)
|
||||
|
|
@ -175,8 +177,10 @@ class kern(Parameterized):
|
|||
def add(self, other, tensor=False):
|
||||
"""
|
||||
Add another kernel to this one. Both kernels are defined on the same _space_
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
|
||||
"""
|
||||
if tensor:
|
||||
D = self.input_dim + other.input_dim
|
||||
|
|
@ -218,11 +222,13 @@ class kern(Parameterized):
|
|||
|
||||
def prod(self, other, tensor=False):
|
||||
"""
|
||||
multiply two kernels (either on the same space, or on the tensor product of the input space).
|
||||
Multiply two kernels (either on the same space, or on the tensor product of the input space).
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
:param tensor: whether or not to use the tensor space (default is false).
|
||||
:type tensor: bool
|
||||
|
||||
"""
|
||||
K1 = self.copy()
|
||||
K2 = other.copy()
|
||||
|
|
@ -321,6 +327,7 @@ class kern(Parameterized):
|
|||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)
|
||||
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
target = np.zeros(self.num_params)
|
||||
|
|
@ -340,6 +347,7 @@ class kern(Parameterized):
|
|||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros_like(X)
|
||||
if X2 is None:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
|
@ -413,6 +421,7 @@ class kern(Parameterized):
|
|||
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
|
||||
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
|
||||
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
|
||||
|
||||
"""
|
||||
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
|
@ -568,7 +577,7 @@ class Kern_check_model(Model):
|
|||
|
||||
def is_positive_definite(self):
|
||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||
if any(v<0):
|
||||
if any(v<-1e-6):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
|
@ -657,6 +666,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
:type X: ndarray
|
||||
:param X2: X2 input values to test the covariance function.
|
||||
:type X2: ndarray
|
||||
|
||||
"""
|
||||
pass_checks = True
|
||||
if X==None:
|
||||
|
|
@ -683,7 +693,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
|
|
@ -694,7 +704,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||
|
|
@ -705,10 +715,15 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -719,7 +734,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -730,7 +750,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt X.")
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -738,5 +763,5 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
return pass_checks
|
||||
|
|
|
|||
|
|
@ -11,12 +11,13 @@ class Coregionalize(Kernpart):
|
|||
"""
|
||||
Covariance function for intrinsic/linear coregionalization models
|
||||
|
||||
This covariance has the form
|
||||
This covariance has the form:
|
||||
.. math::
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa)
|
||||
|
||||
An intrinsic/linear coregionalization covariance function of the form
|
||||
An intrinsic/linear coregionalization covariance function of the form:
|
||||
.. math::
|
||||
|
||||
k_2(x, y)=\mathbf{B} k(x, y)
|
||||
|
||||
it is obtained as the tensor product between a covariance function
|
||||
|
|
@ -31,7 +32,7 @@ class Coregionalize(Kernpart):
|
|||
:param kappa: a vector which allows the outputs to behave independently
|
||||
:type kappa: numpy array of dimensionality (output_dim,)
|
||||
|
||||
.. Note: see coregionalization examples in GPy.examples.regression for some usage.
|
||||
.. note: see coregionalization examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
def __init__(self, output_dim, rank=1, W=None, kappa=None):
|
||||
self.input_dim = 1
|
||||
|
|
|
|||
|
|
@ -10,9 +10,12 @@ import GPy
|
|||
|
||||
class Hetero(Kernpart):
|
||||
"""
|
||||
TODO: Need to constrain the function outputs positive (still thinking of best way of doing this!!! Yes, intend to use transformations, but what's the *best* way). Currently just squaring output.
|
||||
TODO: Need to constrain the function outputs
|
||||
positive (still thinking of best way of doing this!!! Yes, intend to use
|
||||
transformations, but what's the *best* way). Currently just squaring output.
|
||||
|
||||
Heteroschedastic noise which depends on input location. See, for example, this paper by Goldberg et al.
|
||||
Heteroschedastic noise which depends on input location. See, for example,
|
||||
this paper by Goldberg et al.
|
||||
|
||||
.. math::
|
||||
|
||||
|
|
@ -20,15 +23,15 @@ class Hetero(Kernpart):
|
|||
|
||||
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
|
||||
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:rtype: Kernpart object
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:rtype: Kernpart object
|
||||
|
||||
See this paper:
|
||||
|
||||
|
|
@ -36,7 +39,7 @@ class Hetero(Kernpart):
|
|||
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
|
||||
Process Treatment In Advances in Neural Information Processing
|
||||
Systems, Volume 10, pp. 493-499. MIT Press
|
||||
|
||||
|
||||
for a Gaussian process treatment of this problem.
|
||||
|
||||
"""
|
||||
|
|
@ -47,7 +50,7 @@ class Hetero(Kernpart):
|
|||
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
|
||||
if not transform:
|
||||
transform = GPy.core.transformations.logexp()
|
||||
|
||||
|
||||
self.transform = transform
|
||||
self.mapping = mapping
|
||||
self.name='hetero'
|
||||
|
|
@ -66,7 +69,7 @@ class Hetero(Kernpart):
|
|||
|
||||
def K(self, X, X2, target):
|
||||
"""Return covariance between X and X2."""
|
||||
if X2==None or X2 is X:
|
||||
if (X2 is None) or (X2 is X):
|
||||
target[np.diag_indices_from(target)] += self._Kdiag(X)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
|
|
@ -76,26 +79,26 @@ class Hetero(Kernpart):
|
|||
def _Kdiag(self, X):
|
||||
"""Helper function for computing the diagonal elements of the covariance."""
|
||||
return self.mapping.f(X).flatten()**2
|
||||
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
if X2==None or X2 is X:
|
||||
if (X2 is None) or (X2 is X):
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dtheta(dL_dKdiag, X, target)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
if X2==None or X2 is X:
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dX(dL_dKdiag, X, target)
|
||||
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,8 @@ class Kernpart(object):
|
|||
raise NotImplementedError
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
|
|
@ -97,6 +99,9 @@ class Kernpart_stationary(Kernpart):
|
|||
# wrt lengthscale is 0.
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
pass # true for all stationary kernels
|
||||
|
||||
|
||||
class Kernpart_inner(Kernpart):
|
||||
def __init__(self,input_dim):
|
||||
|
|
|
|||
|
|
@ -7,11 +7,13 @@ four_over_tau = 2./np.pi
|
|||
|
||||
class MLP(Kernpart):
|
||||
"""
|
||||
multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
|
||||
|
||||
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sigma^2 \frac{2}{\pi} \text{asin} \left(\frac{\sigma_w^2 x^\top y+\sigma_b^2}{\sqrt{\sigma_w^2x^\top x + \sigma_b^2 + 1}\sqrt{\sigma_w^2 y^\top y \sigma_b^2 +1}} \right)
|
||||
k(x,y) = \\sigma^{2}\\frac{2}{\\pi } \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right )
|
||||
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -24,6 +26,7 @@ class MLP(Kernpart):
|
|||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):
|
||||
|
|
|
|||
|
|
@ -7,22 +7,22 @@ four_over_tau = 2./np.pi
|
|||
|
||||
class POLY(Kernpart):
|
||||
"""
|
||||
polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel,
|
||||
.. math::
|
||||
|
||||
k(x, y) = \sigma^2*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are \sigma^2 (variance), \sigma^2_w
|
||||
(weight_variance), \sigma^2_b (bias_variance) and d
|
||||
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
|
||||
|
||||
.. math::
|
||||
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
|
||||
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
|
||||
(degree). Only gradients of the first three are provided for
|
||||
kernel optimisation, it is assumed that polynomial degree would
|
||||
be set by hand.
|
||||
|
||||
The kernel is not recommended as it is badly behaved when the
|
||||
\sigma^2_w*x'*y + \sigma^2_b has a magnitude greater than one. For completeness
|
||||
there will be an automatic relevance determination version of this
|
||||
kernel provided (NOT YET IMPLEMENTED!).
|
||||
|
||||
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
|
||||
there is an automatic relevance determination version of this
|
||||
kernel provided (NOTE YET IMPLEMENTED!).
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
|
|
@ -32,7 +32,7 @@ class POLY(Kernpart):
|
|||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param degree: the degree of the polynomial.
|
||||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue