mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-30 23:36:23 +02:00
Merge branch 'devel' into params
This commit is contained in:
commit
4f56506aa6
60 changed files with 1944 additions and 596 deletions
|
|
@ -17,6 +17,7 @@ def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -33,6 +34,7 @@ def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -41,11 +43,13 @@ def linear(input_dim,variances=None,ARD=False):
|
|||
"""
|
||||
Construct a linear kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dimD (int), obligatory
|
||||
variances (np.ndarray)
|
||||
ARD (boolean)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variances:
|
||||
:type variances: np.ndarray
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.linear.Linear(input_dim,variances,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -64,39 +68,42 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False
|
|||
:type bias_variance: float
|
||||
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def gibbs(input_dim,variance=1., mapping=None):
|
||||
"""
|
||||
|
||||
Gibbs and MacKay non-stationary covariance function.
|
||||
|
||||
.. math::
|
||||
|
||||
r = sqrt((x_i - x_j)'*(x_i - x_j))
|
||||
r = \\sqrt{((x_i - x_j)'*(x_i - x_j))}
|
||||
|
||||
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
|
||||
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
|
||||
Z = \\sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
|
||||
|
||||
where :math:`l(x)` is a function giving the length scale as a function of space.
|
||||
This is the non stationary kernel proposed by Mark Gibbs in his 1997
|
||||
thesis. It is similar to an RBF but has a length scale that varies
|
||||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
Where :math:`l(x)` is a function giving the length scale as a function of space.
|
||||
|
||||
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
||||
This is the non stationary kernel proposed by Mark Gibbs in his 1997
|
||||
thesis. It is similar to an RBF but has a length scale that varies
|
||||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param mapping: the mapping that gives the lengthscale across the input space.
|
||||
:type mapping: GPy.core.Mapping
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
The parameters are :math:`\\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\\sigma^2`
|
||||
:type variance: float
|
||||
:param mapping: the mapping that gives the lengthscale across the input space.
|
||||
:type mapping: GPy.core.Mapping
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
"""
|
||||
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
|
||||
|
|
@ -124,6 +131,7 @@ def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2,
|
|||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -132,14 +140,42 @@ def white(input_dim,variance=1.):
|
|||
"""
|
||||
Construct a white kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dimD (int), obligatory
|
||||
variance (float)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.white.White(input_dim,variance)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def eq_ode1(output_dim, W=None, rank=1, kappa=None, length_scale=1., decay=None, delay=None):
|
||||
"""Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
|
||||
|
||||
This outputs of this kernel have the form
|
||||
.. math::
|
||||
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
|
||||
|
||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||
|
||||
:param output_dim: number of outputs driven by latent function.
|
||||
:type output_dim: int
|
||||
:param W: sensitivities of each output to the latent driving function.
|
||||
:type W: ndarray (output_dim x rank).
|
||||
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
|
||||
:type rank: int
|
||||
:param decay: decay rates for the first order system.
|
||||
:type decay: array of length output_dim.
|
||||
:param delay: delay between latent force and output response.
|
||||
:type delay: array of length output_dim.
|
||||
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
|
||||
:type kappa: array of length output_dim.
|
||||
|
||||
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
part = parts.eq_ode1.Eq_ode1(output_dim, W, rank, kappa, length_scale, decay, delay)
|
||||
return kern(2, [part])
|
||||
|
||||
|
||||
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
||||
"""
|
||||
|
|
@ -153,6 +189,7 @@ def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -169,6 +206,7 @@ def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -185,6 +223,7 @@ def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -193,10 +232,11 @@ def bias(input_dim, variance=1.):
|
|||
"""
|
||||
Construct a bias kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dim (int), obligatory
|
||||
variance (float)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.bias.Bias(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -204,10 +244,15 @@ def bias(input_dim, variance=1.):
|
|||
def finite_dimensional(input_dim, F, G, variances=1., weights=None):
|
||||
"""
|
||||
Construct a finite dimensional kernel.
|
||||
input_dim: int - the number of input dimensions
|
||||
F: np.array of functions with shape (n,) - the n basis functions
|
||||
G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
variances : np.ndarray with shape (n,)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param F: np.array of functions with shape (n,) - the n basis functions
|
||||
:type F: np.array
|
||||
:param G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
:type G: np.array
|
||||
:param variances: np.ndarray with shape (n,)
|
||||
:type: np.ndarray
|
||||
"""
|
||||
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -220,6 +265,7 @@ def spline(input_dim, variance=1.):
|
|||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.spline.Spline(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -232,43 +278,78 @@ def Brownian(input_dim, variance=1.):
|
|||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.Brownian.Brownian(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
try:
|
||||
import sympy as sp
|
||||
from sympykern import spkern
|
||||
from sympy.parsing.sympy_parser import parse_expr
|
||||
sympy_available = True
|
||||
except ImportError:
|
||||
sympy_available = False
|
||||
|
||||
if sympy_available:
|
||||
from parts.sympykern import spkern
|
||||
from sympy.parsing.sympy_parser import parse_expr
|
||||
from GPy.util.symbolic import sinc
|
||||
|
||||
def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
|
||||
"""
|
||||
Radial Basis Function covariance.
|
||||
"""
|
||||
X = [sp.var('x%i' % i) for i in range(input_dim)]
|
||||
Z = [sp.var('z%i' % i) for i in range(input_dim)]
|
||||
rbf_variance = sp.var('rbf_variance',positive=True)
|
||||
variance = sp.var('variance',positive=True)
|
||||
if ARD:
|
||||
rbf_lengthscales = [sp.var('rbf_lengthscale_%i' % i, positive=True) for i in range(input_dim)]
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
|
||||
lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
|
||||
dist = parse_expr(dist_string)
|
||||
f = rbf_variance*sp.exp(-dist/2.)
|
||||
f = variance*sp.exp(-dist/2.)
|
||||
else:
|
||||
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
|
||||
lengthscale = sp.var('lengthscale',positive=True)
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
|
||||
dist = parse_expr(dist_string)
|
||||
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
|
||||
return kern(input_dim, [spkern(input_dim, f)])
|
||||
f = variance*sp.exp(-dist/(2*lengthscale**2))
|
||||
return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
|
||||
|
||||
def sympykern(input_dim, k):
|
||||
def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
|
||||
"""
|
||||
A kernel from a symbolic sympy representation
|
||||
TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
|
||||
sinc covariance funciton
|
||||
"""
|
||||
return kern(input_dim, [spkern(input_dim, k)])
|
||||
X = [sp.var('x%i' % i) for i in range(input_dim)]
|
||||
Z = [sp.var('z%i' % i) for i in range(input_dim)]
|
||||
variance = sp.var('variance',positive=True)
|
||||
if ARD:
|
||||
lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
|
||||
dist = parse_expr(dist_string)
|
||||
f = variance*sinc(sp.pi*sp.sqrt(dist))
|
||||
else:
|
||||
lengthscale = sp.var('lengthscale',positive=True)
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
|
||||
dist = parse_expr(dist_string)
|
||||
f = variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
|
||||
|
||||
return kern(input_dim, [spkern(input_dim, f, name='sinc')])
|
||||
|
||||
def sympykern(input_dim, k,name=None):
|
||||
"""
|
||||
A base kernel object, where all the hard work in done by sympy.
|
||||
|
||||
:param k: the covariance function
|
||||
:type k: a positive definite sympy function of x1, z1, x2, z2...
|
||||
|
||||
To construct a new sympy kernel, you'll need to define:
|
||||
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
||||
- that's it! we'll extract the variables from the function k.
|
||||
|
||||
Note:
|
||||
- to handle multiple inputs, call them x1, z1, etc
|
||||
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
|
||||
"""
|
||||
return kern(input_dim, [spkern(input_dim, k,name)])
|
||||
del sympy_available
|
||||
|
||||
def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
||||
|
|
@ -285,6 +366,7 @@ def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 *
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -303,6 +385,7 @@ def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -321,6 +404,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -334,6 +418,7 @@ def prod(k1,k2,tensor=False):
|
|||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
||||
:type tensor: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
part = parts.prod.Prod(k1, k2, tensor)
|
||||
return kern(part.input_dim, [part])
|
||||
|
|
@ -346,30 +431,32 @@ def symmetric(k):
|
|||
k_.parts = [symmetric.Symmetric(p) for p in k.parts]
|
||||
return k_
|
||||
|
||||
def coregionalize(num_outputs,W_columns=1, W=None, kappa=None):
|
||||
def coregionalize(output_dim,rank=1, W=None, kappa=None):
|
||||
"""
|
||||
Coregionlization matrix B, of the form:
|
||||
|
||||
.. math::
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
||||
|
||||
An intrinsic/linear coregionalization kernel of the form
|
||||
An intrinsic/linear coregionalization kernel of the form:
|
||||
|
||||
.. math::
|
||||
k_2(x, y)=\mathbf{B} k(x, y)
|
||||
|
||||
it is obtainded as the tensor product between a kernel k(x,y) and B.
|
||||
|
||||
:param num_outputs: the number of outputs to coregionalize
|
||||
:type num_outputs: int
|
||||
:param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
|
||||
:type W_colunns: int
|
||||
:param output_dim: the number of outputs to corregionalize
|
||||
:type output_dim: int
|
||||
:param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
|
||||
:type rank: int
|
||||
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
|
||||
:type W: numpy array of dimensionality (num_outpus, W_columns)
|
||||
:type W: numpy array of dimensionality (num_outpus, rank)
|
||||
:param kappa: a vector which allows the outputs to behave independently
|
||||
:type kappa: numpy array of dimensionality (num_outputs,)
|
||||
:type kappa: numpy array of dimensionality (output_dim,)
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
p = parts.coregionalize.Coregionalize(num_outputs,W_columns,W,kappa)
|
||||
p = parts.coregionalize.Coregionalize(output_dim,rank,W,kappa)
|
||||
return kern(1,[p])
|
||||
|
||||
|
||||
|
|
@ -422,25 +509,26 @@ def independent_outputs(k):
|
|||
|
||||
def hierarchical(k):
|
||||
"""
|
||||
TODO THis can't be right! Construct a kernel with independent outputs from an existing kernel
|
||||
TODO This can't be right! Construct a kernel with independent outputs from an existing kernel
|
||||
"""
|
||||
# for sl in k.input_slices:
|
||||
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
||||
_parts = [parts.hierarchical.Hierarchical(k.parts)]
|
||||
return kern(k.input_dim+len(k.parts),_parts)
|
||||
|
||||
def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa=None):
|
||||
def build_lcm(input_dim, output_dim, kernel_list = [], rank=1,W=None,kappa=None):
|
||||
"""
|
||||
Builds a kernel of a linear coregionalization model
|
||||
|
||||
:input_dim: Input dimensionality
|
||||
:num_outputs: Number of outputs
|
||||
:output_dim: Number of outputs
|
||||
:kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix
|
||||
:type kernel_list: list of GPy kernels
|
||||
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
|
||||
:type W_columns: integer
|
||||
:param rank: number tuples of the corregionalization parameters 'coregion_W'
|
||||
:type rank: integer
|
||||
|
||||
..note the kernels dimensionality is overwritten to fit input_dim
|
||||
|
||||
..Note the kernels dimensionality is overwritten to fit input_dim
|
||||
"""
|
||||
|
||||
for k in kernel_list:
|
||||
|
|
@ -448,11 +536,31 @@ def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa
|
|||
k.input_dim = input_dim
|
||||
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
|
||||
|
||||
k_coreg = coregionalize(num_outputs,W_columns,W,kappa)
|
||||
k_coreg = coregionalize(output_dim,rank,W,kappa)
|
||||
kernel = kernel_list[0]**k_coreg.copy()
|
||||
|
||||
for k in kernel_list[1:]:
|
||||
k_coreg = coregionalize(num_outputs,W_columns,W,kappa)
|
||||
k_coreg = coregionalize(output_dim,rank,W,kappa)
|
||||
kernel += k**k_coreg.copy()
|
||||
|
||||
return kernel
|
||||
|
||||
def ODE_1(input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
|
||||
"""
|
||||
kernel resultiong from a first order ODE with OU driving GP
|
||||
|
||||
:param input_dim: the number of input dimension, has to be equal to one
|
||||
:type input_dim: int
|
||||
:param varianceU: variance of the driving GP
|
||||
:type varianceU: float
|
||||
:param lengthscaleU: lengthscale of the driving GP
|
||||
:type lengthscaleU: float
|
||||
:param varianceY: 'variance' of the transfer function
|
||||
:type varianceY: float
|
||||
:param lengthscaleY: 'lengthscale' of the transfer function
|
||||
:type lengthscaleY: float
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY)
|
||||
return kern(input_dim, [part])
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import pylab as pb
|
||||
from ..core.parameterized import Parameterized
|
||||
|
|
@ -79,13 +80,15 @@ class kern(Parameterized):
|
|||
|
||||
|
||||
def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
|
||||
"""If an ARD kernel is present, it bar-plots the ARD parameters,
|
||||
"""If an ARD kernel is present, it bar-plots the ARD parameters.
|
||||
|
||||
:param fignum: figure number of the plot
|
||||
:param ax: matplotlib axis to plot on
|
||||
:param title:
|
||||
title of the plot,
|
||||
pass '' to not print a title
|
||||
pass None for a generic title
|
||||
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pb.figure(fignum)
|
||||
|
|
@ -176,8 +179,10 @@ class kern(Parameterized):
|
|||
def add(self, other, tensor=False):
|
||||
"""
|
||||
Add another kernel to this one. Both kernels are defined on the same _space_
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
|
||||
"""
|
||||
if tensor:
|
||||
D = self.input_dim + other.input_dim
|
||||
|
|
@ -219,11 +224,13 @@ class kern(Parameterized):
|
|||
|
||||
def prod(self, other, tensor=False):
|
||||
"""
|
||||
multiply two kernels (either on the same space, or on the tensor product of the input space).
|
||||
Multiply two kernels (either on the same space, or on the tensor product of the input space).
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
:param tensor: whether or not to use the tensor space (default is false).
|
||||
:type tensor: bool
|
||||
|
||||
"""
|
||||
K1 = self.copy()
|
||||
K2 = other.copy()
|
||||
|
|
@ -322,6 +329,7 @@ class kern(Parameterized):
|
|||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)
|
||||
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
target = np.zeros(self.num_params)
|
||||
|
|
@ -341,6 +349,7 @@ class kern(Parameterized):
|
|||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros_like(X)
|
||||
if X2 is None:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
|
@ -414,6 +423,7 @@ class kern(Parameterized):
|
|||
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
|
||||
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
|
||||
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
|
||||
|
||||
"""
|
||||
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
|
@ -568,7 +578,7 @@ class Kern_check_model(Model):
|
|||
|
||||
def is_positive_definite(self):
|
||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||
if any(v<0):
|
||||
if any(v<-10*sys.float_info.epsilon):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
|
@ -657,6 +667,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
:type X: ndarray
|
||||
:param X2: X2 input values to test the covariance function.
|
||||
:type X2: ndarray
|
||||
|
||||
"""
|
||||
pass_checks = True
|
||||
if X==None:
|
||||
|
|
@ -683,7 +694,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
|
|
@ -694,7 +705,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||
|
|
@ -705,10 +716,15 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -719,7 +735,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -730,7 +751,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt X.")
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -738,5 +764,5 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
return pass_checks
|
||||
|
|
|
|||
161
GPy/kern/parts/ODE_1.py
Normal file
161
GPy/kern/parts/ODE_1.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class ODE_1(Kernpart):
|
||||
"""
|
||||
kernel resultiong from a first order ODE with OU driving GP
|
||||
|
||||
:param input_dim: the number of input dimension, has to be equal to one
|
||||
:type input_dim: int
|
||||
:param varianceU: variance of the driving GP
|
||||
:type varianceU: float
|
||||
:param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU)
|
||||
:type lengthscaleU: float
|
||||
:param varianceY: 'variance' of the transfer function
|
||||
:type varianceY: float
|
||||
:param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
|
||||
:type lengthscaleY: float
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
|
||||
assert input_dim==1, "Only defined for input_dim = 1"
|
||||
self.input_dim = input_dim
|
||||
self.num_params = 4
|
||||
self.name = 'ODE_1'
|
||||
if lengthscaleU is not None:
|
||||
lengthscaleU = np.asarray(lengthscaleU)
|
||||
assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
|
||||
else:
|
||||
lengthscaleU = np.ones(1)
|
||||
if lengthscaleY is not None:
|
||||
lengthscaleY = np.asarray(lengthscaleY)
|
||||
assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
|
||||
else:
|
||||
lengthscaleY = np.ones(1)
|
||||
#lengthscaleY = 0.5
|
||||
self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
|
||||
|
||||
def _set_params(self, x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size == self.num_params
|
||||
self.varianceU = x[0]
|
||||
self.varianceY = x[1]
|
||||
self.lengthscaleU = x[2]
|
||||
self.lengthscaleY = x[3]
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
|
||||
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
# i1 = X[:,1]
|
||||
# i2 = X2[:,1]
|
||||
# X = X[:,0].reshape(-1,1)
|
||||
# X2 = X2[:,0].reshape(-1,1)
|
||||
dist = np.abs(X - X2.T)
|
||||
|
||||
ly=1/self.lengthscaleY
|
||||
lu=np.sqrt(3)/self.lengthscaleU
|
||||
#ly=self.lengthscaleY
|
||||
#lu=self.lengthscaleU
|
||||
|
||||
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
|
||||
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||
|
||||
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
ly=1/self.lengthscaleY
|
||||
lu=np.sqrt(3)/self.lengthscaleU
|
||||
#ly=self.lengthscaleY
|
||||
#lu=self.lengthscaleU
|
||||
|
||||
k1 = (2*lu+ly)/(lu+ly)**2
|
||||
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
|
||||
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
|
||||
|
||||
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.abs(X - X2.T)
|
||||
|
||||
ly=1/self.lengthscaleY
|
||||
lu=np.sqrt(3)/self.lengthscaleU
|
||||
#ly=self.lengthscaleY
|
||||
#lu=self.lengthscaleU
|
||||
|
||||
dk1theta1 = np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
|
||||
#c=np.sqrt(3)
|
||||
#t1=c/lu
|
||||
#t2=1/ly
|
||||
#dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3 )
|
||||
|
||||
dk2theta1 = 1*(
|
||||
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
|
||||
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
|
||||
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
|
||||
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
|
||||
)
|
||||
|
||||
dk3theta1 = np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
|
||||
|
||||
dktheta1 = self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
|
||||
|
||||
|
||||
|
||||
|
||||
dk1theta2 = np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
|
||||
|
||||
dk2theta2 = 1*(
|
||||
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
|
||||
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
|
||||
)
|
||||
|
||||
dk3theta2 = np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
|
||||
|
||||
dktheta2 = self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
|
||||
|
||||
|
||||
|
||||
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
|
||||
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||
dkdvar = k1+k2+k3
|
||||
|
||||
target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
|
||||
target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
|
||||
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
|
||||
target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
|
||||
|
||||
|
||||
# def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
# """derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
# # NB: derivative of diagonal elements wrt lengthscale is 0
|
||||
# target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
# def dK_dX(self, dL_dK, X, X2, target):
|
||||
# """derivative of the covariance matrix with respect to X."""
|
||||
# if X2 is None: X2 = X
|
||||
# dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
# ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
# dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
|
||||
# target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
# def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
# pass
|
||||
|
|
@ -2,16 +2,18 @@ import bias
|
|||
import Brownian
|
||||
import coregionalize
|
||||
import exponential
|
||||
import eq_ode1
|
||||
import finite_dimensional
|
||||
import fixed
|
||||
import gibbs
|
||||
#import hetero #hetero.py is not commited: omitting for now. JH.
|
||||
import hetero
|
||||
import hierarchical
|
||||
import independent_outputs
|
||||
import linear
|
||||
import Matern32
|
||||
import Matern52
|
||||
import mlp
|
||||
import ODE_1
|
||||
import periodic_exponential
|
||||
import periodic_Matern32
|
||||
import periodic_Matern52
|
||||
|
|
|
|||
|
|
@ -11,44 +11,47 @@ class Coregionalize(Kernpart):
|
|||
"""
|
||||
Covariance function for intrinsic/linear coregionalization models
|
||||
|
||||
This covariance has the form
|
||||
This covariance has the form:
|
||||
.. math::
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa)
|
||||
|
||||
An intrinsic/linear coregionalization covariance function of the form
|
||||
An intrinsic/linear coregionalization covariance function of the form:
|
||||
.. math::
|
||||
|
||||
k_2(x, y)=\mathbf{B} k(x, y)
|
||||
|
||||
it is obtained as the tensor product between a covariance function
|
||||
k(x,y) and B.
|
||||
|
||||
:param num_outputs: number of outputs to coregionalize
|
||||
:type num_outputs: int
|
||||
:param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
|
||||
:type W_colunns: int
|
||||
:param output_dim: number of outputs to coregionalize
|
||||
:type output_dim: int
|
||||
:param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
|
||||
:type rank: int
|
||||
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
|
||||
:type W: numpy array of dimensionality (num_outpus, W_columns)
|
||||
:param kappa: a vector which allows the outputs to behave independently
|
||||
:type kappa: numpy array of dimensionality (num_outputs,)
|
||||
:type kappa: numpy array of dimensionality (output_dim,)
|
||||
|
||||
.. Note: see coregionalization examples in GPy.examples.regression for some usage.
|
||||
.. note: see coregionalization examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
def __init__(self,num_outputs,W_columns=1, W=None, kappa=None):
|
||||
def __init__(self, output_dim, rank=1, W=None, kappa=None):
|
||||
self.input_dim = 1
|
||||
self.name = 'coregion'
|
||||
self.num_outputs = num_outputs
|
||||
self.W_columns = W_columns
|
||||
self.output_dim = output_dim
|
||||
self.rank = rank
|
||||
if self.rank>output_dim-1:
|
||||
print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
|
||||
if W is None:
|
||||
self.W = 0.5*np.random.randn(self.num_outputs,self.W_columns)/np.sqrt(self.W_columns)
|
||||
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
|
||||
else:
|
||||
assert W.shape==(self.num_outputs,self.W_columns)
|
||||
assert W.shape==(self.output_dim,self.rank)
|
||||
self.W = W
|
||||
if kappa is None:
|
||||
kappa = 0.5*np.ones(self.num_outputs)
|
||||
kappa = 0.5*np.ones(self.output_dim)
|
||||
else:
|
||||
assert kappa.shape==(self.num_outputs,)
|
||||
assert kappa.shape==(self.output_dim,)
|
||||
self.kappa = kappa
|
||||
self.num_params = self.num_outputs*(self.W_columns + 1)
|
||||
self.num_params = self.output_dim*(self.rank + 1)
|
||||
self._set_params(np.hstack([self.W.flatten(),self.kappa]))
|
||||
|
||||
def _get_params(self):
|
||||
|
|
@ -56,12 +59,12 @@ class Coregionalize(Kernpart):
|
|||
|
||||
def _set_params(self,x):
|
||||
assert x.size == self.num_params
|
||||
self.kappa = x[-self.num_outputs:]
|
||||
self.W = x[:-self.num_outputs].reshape(self.num_outputs,self.W_columns)
|
||||
self.kappa = x[-self.output_dim:]
|
||||
self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank)
|
||||
self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
|
||||
|
||||
def _get_param_names(self):
|
||||
return sum([['W%i_%i'%(i,j) for j in range(self.W_columns)] for i in range(self.num_outputs)],[]) + ['kappa_%i'%i for i in range(self.num_outputs)]
|
||||
return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)]
|
||||
|
||||
def K(self,index,index2,target):
|
||||
index = np.asarray(index,dtype=np.int)
|
||||
|
|
@ -79,26 +82,26 @@ class Coregionalize(Kernpart):
|
|||
if index2 is None:
|
||||
code="""
|
||||
for(int i=0;i<N; i++){
|
||||
target[i+i*N] += B[index[i]+num_outputs*index[i]];
|
||||
target[i+i*N] += B[index[i]+output_dim*index[i]];
|
||||
for(int j=0; j<i; j++){
|
||||
target[j+i*N] += B[index[i]+num_outputs*index[j]];
|
||||
target[j+i*N] += B[index[i]+output_dim*index[j]];
|
||||
target[i+j*N] += target[j+i*N];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N,B,num_outputs = index.size, self.B, self.num_outputs
|
||||
weave.inline(code,['target','index','N','B','num_outputs'])
|
||||
N,B,output_dim = index.size, self.B, self.output_dim
|
||||
weave.inline(code,['target','index','N','B','output_dim'])
|
||||
else:
|
||||
index2 = np.asarray(index2,dtype=np.int)
|
||||
code="""
|
||||
for(int i=0;i<num_inducing; i++){
|
||||
for(int j=0; j<N; j++){
|
||||
target[i+j*num_inducing] += B[num_outputs*index[j]+index2[i]];
|
||||
target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N,num_inducing,B,num_outputs = index.size,index2.size, self.B, self.num_outputs
|
||||
weave.inline(code,['target','index','index2','N','num_inducing','B','num_outputs'])
|
||||
N,num_inducing,B,output_dim = index.size,index2.size, self.B, self.output_dim
|
||||
weave.inline(code,['target','index','index2','N','num_inducing','B','output_dim'])
|
||||
|
||||
|
||||
def Kdiag(self,index,target):
|
||||
|
|
@ -115,12 +118,12 @@ class Coregionalize(Kernpart):
|
|||
code="""
|
||||
for(int i=0; i<num_inducing; i++){
|
||||
for(int j=0; j<N; j++){
|
||||
dL_dK_small[index[j] + num_outputs*index2[i]] += dL_dK[i+j*num_inducing];
|
||||
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N, num_inducing, num_outputs = index.size, index2.size, self.num_outputs
|
||||
weave.inline(code, ['N','num_inducing','num_outputs','dL_dK','dL_dK_small','index','index2'])
|
||||
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
|
||||
weave.inline(code, ['N','num_inducing','output_dim','dL_dK','dL_dK_small','index','index2'])
|
||||
|
||||
dkappa = np.diag(dL_dK_small)
|
||||
dL_dK_small += dL_dK_small.T
|
||||
|
|
@ -137,8 +140,8 @@ class Coregionalize(Kernpart):
|
|||
ii,jj = ii.T, jj.T
|
||||
|
||||
dL_dK_small = np.zeros_like(self.B)
|
||||
for i in range(self.num_outputs):
|
||||
for j in range(self.num_outputs):
|
||||
for i in range(self.output_dim):
|
||||
for j in range(self.output_dim):
|
||||
tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
|
||||
dL_dK_small[i,j] = tmp
|
||||
|
||||
|
|
@ -150,8 +153,8 @@ class Coregionalize(Kernpart):
|
|||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
||||
index = np.asarray(index,dtype=np.int).flatten()
|
||||
dL_dKdiag_small = np.zeros(self.num_outputs)
|
||||
for i in range(self.num_outputs):
|
||||
dL_dKdiag_small = np.zeros(self.output_dim)
|
||||
for i in range(self.output_dim):
|
||||
dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
|
||||
dW = 2.*self.W*dL_dKdiag_small[:,None]
|
||||
dkappa = dL_dKdiag_small
|
||||
|
|
|
|||
556
GPy/kern/parts/eq_ode1.py
Normal file
556
GPy/kern/parts/eq_ode1.py
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
# Copyright (c) 2013, GPy Authors, see AUTHORS.txt
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from GPy.util.linalg import mdot, pdinv
|
||||
from GPy.util.ln_diff_erfs import ln_diff_erfs
|
||||
import pdb
|
||||
from scipy import weave
|
||||
|
||||
class Eq_ode1(Kernpart):
|
||||
"""
|
||||
Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
|
||||
|
||||
This outputs of this kernel have the form
|
||||
.. math::
|
||||
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
|
||||
|
||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||
|
||||
:param output_dim: number of outputs driven by latent function.
|
||||
:type output_dim: int
|
||||
:param W: sensitivities of each output to the latent driving function.
|
||||
:type W: ndarray (output_dim x rank).
|
||||
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
|
||||
:type rank: int
|
||||
:param decay: decay rates for the first order system.
|
||||
:type decay: array of length output_dim.
|
||||
:param delay: delay between latent force and output response.
|
||||
:type delay: array of length output_dim.
|
||||
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
|
||||
:type kappa: array of length output_dim.
|
||||
|
||||
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0, decay=None, delay=None):
|
||||
self.rank = rank
|
||||
self.input_dim = 1
|
||||
self.name = 'eq_ode1'
|
||||
self.output_dim = output_dim
|
||||
self.lengthscale = lengthscale
|
||||
self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
|
||||
if kappa is not None:
|
||||
self.num_params+=self.output_dim
|
||||
if delay is not None:
|
||||
assert delay.shape==(self.output_dim-1,)
|
||||
self.num_params+=self.output_dim-1
|
||||
self.rank = rank
|
||||
if W is None:
|
||||
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
|
||||
else:
|
||||
assert W.shape==(self.output_dim,self.rank)
|
||||
self.W = W
|
||||
if decay is None:
|
||||
self.decay = np.ones(self.output_dim-1)
|
||||
if kappa is not None:
|
||||
assert kappa.shape==(self.output_dim,)
|
||||
self.kappa = kappa
|
||||
|
||||
self.delay = delay
|
||||
self.is_normalized = True
|
||||
self.is_stationary = False
|
||||
self.gaussian_initial = False
|
||||
self._set_params(self._get_params())
|
||||
|
||||
def _get_params(self):
|
||||
param_list = [self.W.flatten()]
|
||||
if self.kappa is not None:
|
||||
param_list.append(self.kappa)
|
||||
param_list.append(self.decay)
|
||||
if self.delay is not None:
|
||||
param_list.append(self.delay)
|
||||
param_list.append(self.lengthscale)
|
||||
return np.hstack(param_list)
|
||||
|
||||
def _set_params(self,x):
|
||||
assert x.size == self.num_params
|
||||
end = self.output_dim*self.rank
|
||||
self.W = x[:end].reshape(self.output_dim,self.rank)
|
||||
start = end
|
||||
self.B = np.dot(self.W,self.W.T)
|
||||
if self.kappa is not None:
|
||||
end+=self.output_dim
|
||||
self.kappa = x[start:end]
|
||||
self.B += np.diag(self.kappa)
|
||||
start=end
|
||||
end+=self.output_dim-1
|
||||
self.decay = x[start:end]
|
||||
start=end
|
||||
if self.delay is not None:
|
||||
end+=self.output_dim-1
|
||||
self.delay = x[start:end]
|
||||
start=end
|
||||
end+=1
|
||||
self.lengthscale = x[start]
|
||||
self.sigma = np.sqrt(2)*self.lengthscale
|
||||
|
||||
|
||||
def _get_param_names(self):
|
||||
param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
|
||||
if self.kappa is not None:
|
||||
param_names += ['kappa_%i'%i for i in range(self.output_dim)]
|
||||
param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
|
||||
if self.delay is not None:
|
||||
param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
|
||||
param_names+= ['lengthscale']
|
||||
return param_names
|
||||
|
||||
def K(self,X,X2,target):
|
||||
|
||||
if X.shape[1] > 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
|
||||
|
||||
self._K_computations(X, X2)
|
||||
target += self._scale*self._K_dvar
|
||||
|
||||
if self.gaussian_initial:
|
||||
# Add covariance associated with initial condition.
|
||||
t1_mat = self._t[self._rorder, None]
|
||||
t2_mat = self._t2[None, self._rorder2]
|
||||
target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
|
||||
|
||||
def Kdiag(self,index,target):
|
||||
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
||||
pass
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
|
||||
# First extract times and indices.
|
||||
self._extract_t_indices(X, X2, dL_dK=dL_dK)
|
||||
self._dK_ode_dtheta(target)
|
||||
|
||||
|
||||
def _dK_ode_dtheta(self, target):
|
||||
"""Do all the computations for the ode parts of the covariance function."""
|
||||
t_ode = self._t[self._index>0]
|
||||
dL_dK_ode = self._dL_dK[self._index>0, :]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
if self._t2 is None:
|
||||
if t_ode.size==0:
|
||||
return
|
||||
t2_ode = t_ode
|
||||
dL_dK_ode = dL_dK_ode[:, self._index>0]
|
||||
index2_ode = index_ode
|
||||
else:
|
||||
t2_ode = self._t2[self._index2>0]
|
||||
dL_dK_ode = dL_dK_ode[:, self._index2>0]
|
||||
if t_ode.size==0 or t2_ode.size==0:
|
||||
return
|
||||
index2_ode = self._index2[self._index2>0]-1
|
||||
|
||||
h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True)
|
||||
#self._dK_ddelay = self._dh_ddelay
|
||||
self._dK_dsigma = self._dh_dsigma
|
||||
|
||||
if self._t2 is None:
|
||||
h2 = h1
|
||||
else:
|
||||
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True)
|
||||
|
||||
#self._dK_ddelay += self._dh_ddelay.T
|
||||
self._dK_dsigma += self._dh_dsigma.T
|
||||
# C1 = self.sensitivity
|
||||
# C2 = self.sensitivity
|
||||
|
||||
# K = 0.5 * (h1 + h2.T)
|
||||
# var2 = C1*C2
|
||||
# if self.is_normalized:
|
||||
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2
|
||||
# dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2
|
||||
# dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma))
|
||||
# dk_dC1 = C2 * sum(sum(dL_dK.*K))
|
||||
# dk_dC2 = C1 * sum(sum(dL_dK.*K))
|
||||
# else:
|
||||
# K = np.sqrt(np.pi) * K
|
||||
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K))
|
||||
# dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K))
|
||||
|
||||
|
||||
# dk_dSim1Variance = dk_dC1
|
||||
# Last element is the length scale.
|
||||
(dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2)
|
||||
|
||||
target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum()
|
||||
|
||||
|
||||
# # only pass the gradient with respect to the inverse width to one
|
||||
# # of the gradient vectors ... otherwise it is counted twice.
|
||||
# g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance])
|
||||
# g2 = real([dk_dD2 0 dk_dSim2Variance])
|
||||
# return g1, g2"""
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
||||
pass
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
pass
|
||||
|
||||
def _extract_t_indices(self, X, X2=None, dL_dK=None):
|
||||
"""Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
|
||||
|
||||
# TODO: some fast checking here to see if this needs recomputing?
|
||||
self._t = X[:, 0]
|
||||
if not X.shape[1] == 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
|
||||
self._index = np.asarray(X[:, 1],dtype=np.int)
|
||||
# Sort indices so that outputs are in blocks for computational
|
||||
# convenience.
|
||||
self._order = self._index.argsort()
|
||||
self._index = self._index[self._order]
|
||||
self._t = self._t[self._order]
|
||||
self._rorder = self._order.argsort() # rorder is for reversing the order
|
||||
|
||||
if X2 is None:
|
||||
self._t2 = None
|
||||
self._index2 = None
|
||||
self._order2 = self._order
|
||||
self._rorder2 = self._rorder
|
||||
else:
|
||||
if not X2.shape[1] == 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
|
||||
self._t2 = X2[:, 0]
|
||||
self._index2 = np.asarray(X2[:, 1],dtype=np.int)
|
||||
self._order2 = self._index2.argsort()
|
||||
self._index2 = self._index2[self._order2]
|
||||
self._t2 = self._t2[self._order2]
|
||||
self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
|
||||
|
||||
if dL_dK is not None:
|
||||
self._dL_dK = dL_dK[self._order, :]
|
||||
self._dL_dK = self._dL_dK[:, self._order2]
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
"""Perform main body of computations for the ode1 covariance function."""
|
||||
# First extract times and indices.
|
||||
self._extract_t_indices(X, X2)
|
||||
|
||||
self._K_compute_eq()
|
||||
self._K_compute_ode_eq()
|
||||
if X2 is None:
|
||||
self._K_eq_ode = self._K_ode_eq.T
|
||||
else:
|
||||
self._K_compute_ode_eq(transpose=True)
|
||||
self._K_compute_ode()
|
||||
|
||||
if X2 is None:
|
||||
self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
|
||||
else:
|
||||
self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
|
||||
|
||||
# Reorder values of blocks for placing back into _K_dvar.
|
||||
self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
|
||||
np.hstack((self._K_ode_eq, self._K_ode))))
|
||||
self._K_dvar = self._K_dvar[self._rorder, :]
|
||||
self._K_dvar = self._K_dvar[:, self._rorder2]
|
||||
|
||||
|
||||
if X2 is None:
|
||||
# Matrix giving scales of each output
|
||||
self._scale = np.zeros((self._t.size, self._t.size))
|
||||
code="""
|
||||
for(int i=0;i<N; i++){
|
||||
scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
|
||||
for(int j=0; j<i; j++){
|
||||
scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
|
||||
scale_mat[i+j*N] = scale_mat[j+i*N];
|
||||
}
|
||||
}
|
||||
"""
|
||||
scale_mat, B, index = self._scale, self.B, self._index
|
||||
N, output_dim = self._t.size, self.output_dim
|
||||
weave.inline(code,['index',
|
||||
'scale_mat', 'B',
|
||||
'N', 'output_dim'])
|
||||
else:
|
||||
self._scale = np.zeros((self._t.size, self._t2.size))
|
||||
code = """
|
||||
for(int i=0; i<N; i++){
|
||||
for(int j=0; j<N2; j++){
|
||||
scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
|
||||
}
|
||||
}
|
||||
"""
|
||||
scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
|
||||
N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
|
||||
weave.inline(code, ['index', 'index2',
|
||||
'scale_mat', 'B',
|
||||
'N', 'N2', 'output_dim'])
|
||||
|
||||
|
||||
|
||||
def _K_compute_eq(self):
|
||||
"""Compute covariance for latent covariance."""
|
||||
t_eq = self._t[self._index==0]
|
||||
if self._t2 is None:
|
||||
if t_eq.size==0:
|
||||
self._K_eq = np.zeros((0, 0))
|
||||
return
|
||||
self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
|
||||
else:
|
||||
t2_eq = self._t2[self._index2==0]
|
||||
if t_eq.size==0 or t2_eq.size==0:
|
||||
self._K_eq = np.zeros((t_eq.size, t2_eq.size))
|
||||
return
|
||||
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
|
||||
|
||||
self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
|
||||
if self.is_normalized:
|
||||
self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
|
||||
|
||||
def _K_compute_ode_eq(self, transpose=False):
|
||||
"""Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
|
||||
|
||||
:param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
|
||||
:type transpose: bool"""
|
||||
|
||||
if self._t2 is not None:
|
||||
if transpose:
|
||||
t_eq = self._t[self._index==0]
|
||||
t_ode = self._t2[self._index2>0]
|
||||
index_ode = self._index2[self._index2>0]-1
|
||||
else:
|
||||
t_eq = self._t2[self._index2==0]
|
||||
t_ode = self._t[self._index>0]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
else:
|
||||
t_eq = self._t[self._index==0]
|
||||
t_ode = self._t[self._index>0]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
|
||||
if t_ode.size==0 or t_eq.size==0:
|
||||
if transpose:
|
||||
self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
|
||||
else:
|
||||
self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
|
||||
return
|
||||
|
||||
t_ode_mat = t_ode[:, None]
|
||||
t_eq_mat = t_eq[None, :]
|
||||
if self.delay is not None:
|
||||
t_ode_mat -= self.delay[index_ode, None]
|
||||
diff_t = (t_ode_mat - t_eq_mat)
|
||||
|
||||
inv_sigma_diff_t = 1./self.sigma*diff_t
|
||||
decay_vals = self.decay[index_ode][:, None]
|
||||
half_sigma_d_i = 0.5*self.sigma*decay_vals
|
||||
|
||||
if self.is_stationary:
|
||||
ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
|
||||
else:
|
||||
ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
|
||||
sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
|
||||
|
||||
sK *= 0.5
|
||||
|
||||
if not self.is_normalized:
|
||||
sK *= np.sqrt(np.pi)*self.sigma
|
||||
|
||||
|
||||
if transpose:
|
||||
self._K_eq_ode = sK.T
|
||||
else:
|
||||
self._K_ode_eq = sK
|
||||
|
||||
def _K_compute_ode(self):
|
||||
# Compute covariances between outputs of the ODE models.
|
||||
|
||||
t_ode = self._t[self._index>0]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
if self._t2 is None:
|
||||
if t_ode.size==0:
|
||||
self._K_ode = np.zeros((0, 0))
|
||||
return
|
||||
t2_ode = t_ode
|
||||
index2_ode = index_ode
|
||||
else:
|
||||
t2_ode = self._t2[self._index2>0]
|
||||
if t_ode.size==0 or t2_ode.size==0:
|
||||
self._K_ode = np.zeros((t_ode.size, t2_ode.size))
|
||||
return
|
||||
index2_ode = self._index2[self._index2>0]-1
|
||||
|
||||
# When index is identical
|
||||
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
|
||||
|
||||
if self._t2 is None:
|
||||
self._K_ode = 0.5 * (h + h.T)
|
||||
else:
|
||||
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
|
||||
self._K_ode = 0.5 * (h + h2.T)
|
||||
|
||||
if not self.is_normalized:
|
||||
self._K_ode *= np.sqrt(np.pi)*self.sigma
|
||||
def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False):
|
||||
"""Helper function for computing H for the diagonal only.
|
||||
:param t: time input.
|
||||
:type t: array
|
||||
:param index: first output indices
|
||||
:type index: array of int.
|
||||
:param index: second output indices
|
||||
:type index: array of int.
|
||||
:param update_derivatives: whether or not to update the derivative portions (default False).
|
||||
:type update_derivatives: bool
|
||||
:param stationary: whether to compute the stationary version of the covariance (default False).
|
||||
:type stationary: bool"""
|
||||
|
||||
"""if delta_i~=delta_j:
|
||||
[h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary))
|
||||
else:
|
||||
Decay = self.decay[index]
|
||||
if self.delay is not None:
|
||||
t = t - self.delay[index]
|
||||
|
||||
t_squared = t*t
|
||||
half_sigma_decay = 0.5*self.sigma*Decay
|
||||
[ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
|
||||
half_sigma_decay)
|
||||
|
||||
[ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
|
||||
half_sigma_decay - t/self.sigma)
|
||||
|
||||
h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
|
||||
+ ln_part_1
|
||||
- log(Decay + D_j))
|
||||
- sign2*np.exp(half_sigma_decay*half_sigma_decay
|
||||
- (Decay + D_j)*t
|
||||
+ ln_part_2
|
||||
- log(Decay + D_j)))
|
||||
|
||||
sigma2 = self.sigma*self.sigma
|
||||
|
||||
if update_derivatives:
|
||||
|
||||
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
|
||||
+ t*sign2*np.exp(
|
||||
half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
|
||||
)
|
||||
+ self.sigma/np.sqrt(np.pi)*
|
||||
(-1 + np.exp(-t_squared/sigma2-Decay*t)
|
||||
+ np.exp(-t_squared/sigma2-D_j*t)
|
||||
- np.exp(-(Decay + D_j)*t)))
|
||||
|
||||
dh_dD_i = (dh_dD_i/(Decay+D_j)).real
|
||||
|
||||
|
||||
|
||||
dh_dD_j = (t*sign2*np.exp(
|
||||
half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
|
||||
)
|
||||
-h)
|
||||
dh_dD_j = (dh_dD_j/(Decay + D_j)).real
|
||||
|
||||
dh_dsigma = 0.5*Decay*Decay*self.sigma*h \
|
||||
+ 2/(np.sqrt(np.pi)*(Decay+D_j))\
|
||||
*((-Decay/2) \
|
||||
+ (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \
|
||||
- (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
|
||||
- Decay/2*np.exp(-(Decay+D_j)*t))"""
|
||||
pass
|
||||
|
||||
def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
|
||||
"""Helper function for computing part of the ode1 covariance function.
|
||||
|
||||
:param t: first time input.
|
||||
:type t: array
|
||||
:param index: Indices of first output.
|
||||
:type index: array of int
|
||||
:param t2: second time input.
|
||||
:type t2: array
|
||||
:param index2: Indices of second output.
|
||||
:type index2: array of int
|
||||
:param update_derivatives: whether to update derivatives (default is False)
|
||||
:return h : result of this subcomponent of the kernel for the given values.
|
||||
:rtype: ndarray
|
||||
"""
|
||||
|
||||
if stationary:
|
||||
raise NotImplementedError, "Error, stationary version of this covariance not yet implemented."
|
||||
# Vector of decays and delays associated with each output.
|
||||
Decay = self.decay[index]
|
||||
Decay2 = self.decay[index2]
|
||||
t_mat = t[:, None]
|
||||
t2_mat = t2[None, :]
|
||||
if self.delay is not None:
|
||||
Delay = self.delay[index]
|
||||
Delay2 = self.delay[index2]
|
||||
t_mat-=Delay[:, None]
|
||||
t2_mat-=Delay2[None, :]
|
||||
|
||||
diff_t = (t_mat - t2_mat)
|
||||
inv_sigma_diff_t = 1./self.sigma*diff_t
|
||||
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
|
||||
|
||||
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
|
||||
half_sigma_decay_i - inv_sigma_diff_t,
|
||||
return_sign=True)
|
||||
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
|
||||
half_sigma_decay_i - t_mat/self.sigma,
|
||||
return_sign=True)
|
||||
|
||||
h = sign1*np.exp(half_sigma_decay_i
|
||||
*half_sigma_decay_i
|
||||
-Decay[:, None]*diff_t+ln_part_1
|
||||
-np.log(Decay[:, None] + Decay2[None, :]))
|
||||
h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
|
||||
-Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
|
||||
-np.log(Decay[:, None] + Decay2[None, :]))
|
||||
|
||||
if update_derivatives:
|
||||
sigma2 = self.sigma*self.sigma
|
||||
# Update ith decay gradient
|
||||
|
||||
dh_ddecay = ((0.5*Decay[:, None]*sigma2*(Decay[:, None] + Decay2[None, :])-1)*h
|
||||
+ (-diff_t*sign1*np.exp(
|
||||
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*diff_t+ln_part_1
|
||||
)
|
||||
+t_mat*sign2*np.exp(
|
||||
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*t_mat
|
||||
- Decay2*t2_mat+ln_part_2))
|
||||
+self.sigma/np.sqrt(np.pi)*(
|
||||
-np.exp(
|
||||
-diff_t*diff_t/sigma2
|
||||
)+np.exp(
|
||||
-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat
|
||||
)+np.exp(
|
||||
-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat
|
||||
)-np.exp(
|
||||
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
|
||||
)
|
||||
))
|
||||
self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
|
||||
|
||||
# Update jth decay gradient
|
||||
dh_ddecay2 = (t2_mat*sign2
|
||||
*np.exp(
|
||||
half_sigma_decay_i*half_sigma_decay_i
|
||||
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
|
||||
+ln_part_2
|
||||
)
|
||||
-h)
|
||||
self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
|
||||
|
||||
# Update sigma gradient
|
||||
self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
|
||||
+ 2/(np.sqrt(np.pi)
|
||||
*(Decay[:, None]+Decay2[None, :]))
|
||||
*((-diff_t/sigma2-Decay[:, None]/2)
|
||||
*np.exp(-diff_t*diff_t/sigma2)
|
||||
+ (-t2_mat/sigma2+Decay[:, None]/2)
|
||||
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
|
||||
- (-t_mat/sigma2-Decay[:, None]/2)
|
||||
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
|
||||
- Decay[:, None]/2
|
||||
*np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
|
||||
|
||||
return h
|
||||
|
|
@ -10,9 +10,12 @@ import GPy
|
|||
|
||||
class Hetero(Kernpart):
|
||||
"""
|
||||
TODO: Need to constrain the function outputs positive (still thinking of best way of doing this!!! Yes, intend to use transformations, but what's the *best* way). Currently just squaring output.
|
||||
TODO: Need to constrain the function outputs
|
||||
positive (still thinking of best way of doing this!!! Yes, intend to use
|
||||
transformations, but what's the *best* way). Currently just squaring output.
|
||||
|
||||
Heteroschedastic noise which depends on input location. See, for example, this paper by Goldberg et al.
|
||||
Heteroschedastic noise which depends on input location. See, for example,
|
||||
this paper by Goldberg et al.
|
||||
|
||||
.. math::
|
||||
|
||||
|
|
@ -20,15 +23,15 @@ class Hetero(Kernpart):
|
|||
|
||||
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
|
||||
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:rtype: Kernpart object
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:rtype: Kernpart object
|
||||
|
||||
See this paper:
|
||||
|
||||
|
|
@ -36,7 +39,7 @@ class Hetero(Kernpart):
|
|||
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
|
||||
Process Treatment In Advances in Neural Information Processing
|
||||
Systems, Volume 10, pp. 493-499. MIT Press
|
||||
|
||||
|
||||
for a Gaussian process treatment of this problem.
|
||||
|
||||
"""
|
||||
|
|
@ -47,7 +50,7 @@ class Hetero(Kernpart):
|
|||
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
|
||||
if not transform:
|
||||
transform = GPy.core.transformations.logexp()
|
||||
|
||||
|
||||
self.transform = transform
|
||||
self.mapping = mapping
|
||||
self.name='hetero'
|
||||
|
|
@ -66,7 +69,7 @@ class Hetero(Kernpart):
|
|||
|
||||
def K(self, X, X2, target):
|
||||
"""Return covariance between X and X2."""
|
||||
if X2==None or X2 is X:
|
||||
if (X2 is None) or (X2 is X):
|
||||
target[np.diag_indices_from(target)] += self._Kdiag(X)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
|
|
@ -76,26 +79,26 @@ class Hetero(Kernpart):
|
|||
def _Kdiag(self, X):
|
||||
"""Helper function for computing the diagonal elements of the covariance."""
|
||||
return self.mapping.f(X).flatten()**2
|
||||
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
if X2==None or X2 is X:
|
||||
if (X2 is None) or (X2 is X):
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dtheta(dL_dKdiag, X, target)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
if X2==None or X2 is X:
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dX(dL_dKdiag, X, target)
|
||||
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,8 @@ class Kernpart(object):
|
|||
raise NotImplementedError
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
|
|
@ -97,6 +99,9 @@ class Kernpart_stationary(Kernpart):
|
|||
# wrt lengthscale is 0.
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
pass # true for all stationary kernels
|
||||
|
||||
|
||||
class Kernpart_inner(Kernpart):
|
||||
def __init__(self,input_dim):
|
||||
|
|
|
|||
|
|
@ -7,11 +7,13 @@ four_over_tau = 2./np.pi
|
|||
|
||||
class MLP(Kernpart):
|
||||
"""
|
||||
multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
|
||||
|
||||
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sigma^2 \frac{2}{\pi} \text{asin} \left(\frac{\sigma_w^2 x^\top y+\sigma_b^2}{\sqrt{\sigma_w^2x^\top x + \sigma_b^2 + 1}\sqrt{\sigma_w^2 y^\top y \sigma_b^2 +1}} \right)
|
||||
k(x,y) = \\sigma^{2}\\frac{2}{\\pi } \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right )
|
||||
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -24,6 +26,7 @@ class MLP(Kernpart):
|
|||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):
|
||||
|
|
|
|||
38
GPy/kern/parts/odekern1.c
Normal file
38
GPy/kern/parts/odekern1.c
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#include <math.h>
|
||||
|
||||
double k_uu(t1,t2,theta1,theta2,sig1,sig2)
|
||||
{
|
||||
double kern=0;
|
||||
double dist=0;
|
||||
|
||||
dist = sqrt(t2*t2-t1*t1)
|
||||
|
||||
kern = sig1*(1+theta1*dist)*exp(-theta1*dist)
|
||||
|
||||
return kern;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double k_yy(t1, t2, theta1,theta2,sig1,sig2)
|
||||
{
|
||||
double kern=0;
|
||||
double dist=0;
|
||||
|
||||
dist = sqrt(t2*t2-t1*t1)
|
||||
|
||||
kern = sig1*sig2 * ( exp(-theta1*dist)*(theta2-2*theta1+theta1*theta2*dist-theta1*theta1*dist) +
|
||||
exp(-dist) ) / ((theta2-theta1)*(theta2-theta1))
|
||||
|
||||
return kern;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -7,22 +7,22 @@ four_over_tau = 2./np.pi
|
|||
|
||||
class POLY(Kernpart):
|
||||
"""
|
||||
polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel,
|
||||
.. math::
|
||||
|
||||
k(x, y) = \sigma^2*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are \sigma^2 (variance), \sigma^2_w
|
||||
(weight_variance), \sigma^2_b (bias_variance) and d
|
||||
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
|
||||
|
||||
.. math::
|
||||
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
|
||||
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
|
||||
(degree). Only gradients of the first three are provided for
|
||||
kernel optimisation, it is assumed that polynomial degree would
|
||||
be set by hand.
|
||||
|
||||
The kernel is not recommended as it is badly behaved when the
|
||||
\sigma^2_w*x'*y + \sigma^2_b has a magnitude greater than one. For completeness
|
||||
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
|
||||
there is an automatic relevance determination version of this
|
||||
kernel provided.
|
||||
|
||||
kernel provided (NOTE YET IMPLEMENTED!).
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
|
|
@ -32,7 +32,7 @@ class POLY(Kernpart):
|
|||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param degree: the degree of the polynomial.
|
||||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include <math.h>
|
||||
double DiracDelta(double x){
|
||||
if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills
|
||||
// TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
|
||||
if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
|
||||
return 1.0;
|
||||
else
|
||||
return 0.0;
|
||||
|
|
@ -8,3 +9,17 @@ double DiracDelta(double x){
|
|||
double DiracDelta(double x,int foo){
|
||||
return 0.0;
|
||||
};
|
||||
|
||||
double sinc(double x){
|
||||
if (x==0)
|
||||
return 1.0;
|
||||
else
|
||||
return sin(x)/x;
|
||||
}
|
||||
|
||||
double sinc_grad(double x){
|
||||
if (x==0)
|
||||
return 0.0;
|
||||
else
|
||||
return (x*cos(x) - sin(x))/(x*x);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
#include <math.h>
|
||||
double DiracDelta(double x);
|
||||
double DiracDelta(double x, int foo);
|
||||
|
||||
double sinc(double x);
|
||||
double sinc_grad(double x);
|
||||
|
|
|
|||
|
|
@ -26,8 +26,11 @@ class spkern(Kernpart):
|
|||
- to handle multiple inputs, call them x1, z1, etc
|
||||
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
|
||||
"""
|
||||
def __init__(self,input_dim,k,param=None):
|
||||
self.name='sympykern'
|
||||
def __init__(self,input_dim,k,name=None,param=None):
|
||||
if name is None:
|
||||
self.name='sympykern'
|
||||
else:
|
||||
self.name = name
|
||||
self._sp_k = k
|
||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||
self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
|
||||
|
|
@ -56,9 +59,9 @@ class spkern(Kernpart):
|
|||
|
||||
self.weave_kwargs = {\
|
||||
'support_code':self._function_code,\
|
||||
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\
|
||||
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\
|
||||
'headers':['"sympy_helpers.h"'],\
|
||||
'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\
|
||||
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\
|
||||
#'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
|
||||
'extra_compile_args':[],\
|
||||
'extra_link_args':['-lgomp'],\
|
||||
|
|
@ -109,14 +112,15 @@ class spkern(Kernpart):
|
|||
f.write(self._function_header)
|
||||
f.close()
|
||||
|
||||
#get rid of derivatives of DiracDelta
|
||||
# Substitute any known derivatives which sympy doesn't compute
|
||||
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
||||
|
||||
#Here's some code to do the looping for K
|
||||
arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\
|
||||
+ ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\
|
||||
+ ["param[%i]"%i for i in range(self.num_params)])
|
||||
# Here's the code to do the looping for K
|
||||
arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]
|
||||
+ ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]
|
||||
+ ["param[%i]"%i for i in range(self.num_params)])
|
||||
|
||||
|
||||
self._K_code =\
|
||||
"""
|
||||
int i;
|
||||
|
|
@ -133,9 +137,14 @@ class spkern(Kernpart):
|
|||
%s
|
||||
"""%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
# Similar code when only X is provided.
|
||||
self._K_code_X = self._K_code.replace('Z[', 'X[')
|
||||
|
||||
|
||||
# Code to compute diagonal of covariance.
|
||||
diag_arglist = re.sub('Z','X',arglist)
|
||||
diag_arglist = re.sub('j','i',diag_arglist)
|
||||
#Here's some code to do the looping for Kdiag
|
||||
# Code to do the looping for Kdiag
|
||||
self._Kdiag_code =\
|
||||
"""
|
||||
int i;
|
||||
|
|
@ -148,8 +157,9 @@ class spkern(Kernpart):
|
|||
%s
|
||||
"""%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
#here's some code to compute gradients
|
||||
# Code to compute gradients
|
||||
funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)])
|
||||
|
||||
self._dK_dtheta_code =\
|
||||
"""
|
||||
int i;
|
||||
|
|
@ -164,9 +174,12 @@ class spkern(Kernpart):
|
|||
}
|
||||
}
|
||||
%s
|
||||
"""%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
"""%(funclist,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
||||
|
||||
#here's some code to compute gradients for Kdiag TODO: thius is yucky.
|
||||
# Similar code when only X is provided, change argument lists.
|
||||
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
|
||||
|
||||
# Code to compute gradients for Kdiag TODO: needs clean up
|
||||
diag_funclist = re.sub('Z','X',funclist,count=0)
|
||||
diag_funclist = re.sub('j','i',diag_funclist)
|
||||
diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
|
||||
|
|
@ -181,8 +194,12 @@ class spkern(Kernpart):
|
|||
%s
|
||||
"""%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
#Here's some code to do gradients wrt x
|
||||
# Code for gradients wrt X
|
||||
gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
|
||||
if False:
|
||||
gradient_funcs += """if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
|
||||
if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}"""
|
||||
|
||||
self._dK_dX_code = \
|
||||
"""
|
||||
int i;
|
||||
|
|
@ -192,30 +209,34 @@ class spkern(Kernpart):
|
|||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<N; i++){
|
||||
for (j=0; j<num_inducing; j++){
|
||||
%s
|
||||
//if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
|
||||
//if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}
|
||||
|
||||
}
|
||||
for (j=0; j<num_inducing; j++){
|
||||
%s
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
# Create code for call when just X is passed as argument.
|
||||
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
|
||||
|
||||
#now for gradients of Kdiag wrt X
|
||||
diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0)
|
||||
diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs)
|
||||
diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs)
|
||||
|
||||
# Code for gradients of Kdiag wrt X
|
||||
self._dKdiag_dX_code= \
|
||||
"""
|
||||
int i;
|
||||
int j;
|
||||
int N = partial_array->dimensions[0];
|
||||
int num_inducing = 0;
|
||||
int input_dim = X_array->dimensions[1];
|
||||
for (i=0;i<N; i++){
|
||||
j = i;
|
||||
for (int i=0;i<N; i++){
|
||||
%s
|
||||
}
|
||||
%s
|
||||
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
"""%(diag_gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a
|
||||
# string representation forces recompile when needed Get rid
|
||||
# of Zs in argument for diagonal. TODO: Why wasn't
|
||||
# diag_funclist called here? Need to check that.
|
||||
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
|
||||
|
||||
|
||||
#TODO: insert multiple functions here via string manipulation
|
||||
|
|
@ -223,7 +244,10 @@ class spkern(Kernpart):
|
|||
|
||||
def K(self,X,Z,target):
|
||||
param = self._param
|
||||
weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
|
||||
if Z is None:
|
||||
weave.inline(self._K_code_X,arg_names=['target','X','param'],**self.weave_kwargs)
|
||||
else:
|
||||
weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
param = self._param
|
||||
|
|
@ -231,21 +255,25 @@ class spkern(Kernpart):
|
|||
|
||||
def dK_dtheta(self,partial,X,Z,target):
|
||||
param = self._param
|
||||
weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
if Z is None:
|
||||
weave.inline(self._dK_dtheta_code_X, arg_names=['target','X','param','partial'],**self.weave_kwargs)
|
||||
else:
|
||||
weave.inline(self._dK_dtheta_code, arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
|
||||
def dKdiag_dtheta(self,partial,X,target):
|
||||
param = self._param
|
||||
Z = X
|
||||
weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
|
||||
|
||||
def dK_dX(self,partial,X,Z,target):
|
||||
param = self._param
|
||||
weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
if Z is None:
|
||||
weave.inline(self._dK_dX_code_X,arg_names=['target','X','param','partial'],**self.weave_kwargs)
|
||||
else:
|
||||
weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
|
||||
def dKdiag_dX(self,partial,X,target):
|
||||
param = self._param
|
||||
Z = X
|
||||
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
|
||||
|
||||
def _set_params(self,param):
|
||||
#print param.flags['C_CONTIGUOUS']
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue