mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-13 22:12:38 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
Conflicts: GPy/examples/regression.py GPy/kern/constructors.py GPy/testing/kernel_tests.py GPy/util/multioutput.py
This commit is contained in:
commit
603aa18482
25 changed files with 400 additions and 72 deletions
|
|
@ -5,7 +5,6 @@ import numpy as np
|
|||
from kern import kern
|
||||
import parts
|
||||
|
||||
|
||||
def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
|
||||
"""
|
||||
Construct an RBF kernel
|
||||
|
|
@ -103,6 +102,12 @@ def gibbs(input_dim,variance=1., mapping=None):
|
|||
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def hetero(input_dim, mapping=None, transform=None):
|
||||
"""
|
||||
"""
|
||||
part = parts.hetero.Hetero(input_dim,mapping,transform)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
|
||||
"""
|
||||
Construct a polynomial kernel
|
||||
|
|
@ -135,6 +140,7 @@ def white(input_dim,variance=1.):
|
|||
part = parts.white.White(input_dim,variance)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
|
||||
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
||||
"""
|
||||
Construct an exponential kernel
|
||||
|
|
|
|||
101
GPy/kern/kern.py
101
GPy/kern/kern.py
|
|
@ -12,7 +12,9 @@ from matplotlib.transforms import offset_copy
|
|||
class kern(Parameterized):
|
||||
def __init__(self, input_dim, parts=[], input_slices=None):
|
||||
"""
|
||||
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
|
||||
This is the main kernel class for GPy. It handles multiple
|
||||
(additive) kernel functions, and keeps track of various things
|
||||
like which parameters live where.
|
||||
|
||||
The technical code for kernels is divided into _parts_ (see
|
||||
e.g. rbf.py). This object contains a list of parts, which are
|
||||
|
|
@ -33,6 +35,11 @@ class kern(Parameterized):
|
|||
|
||||
self.input_dim = input_dim
|
||||
|
||||
part_names = [k.name for k in self.parts]
|
||||
self.name=''
|
||||
for name in part_names:
|
||||
self.name += name + '+'
|
||||
self.name = self.name[:-1]
|
||||
# deal with input_slices
|
||||
if input_slices is None:
|
||||
self.input_slices = [slice(None) for p in self.parts]
|
||||
|
|
@ -333,10 +340,8 @@ class kern(Parameterized):
|
|||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target = np.zeros_like(X)
|
||||
if X2 is None:
|
||||
if X2 is None:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
else:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
|
@ -653,17 +658,85 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
:param X2: X2 input values to test the covariance function.
|
||||
:type X2: ndarray
|
||||
"""
|
||||
pass_checks = True
|
||||
if X==None:
|
||||
X = np.random.randn(10, kern.input_dim)
|
||||
if X2==None:
|
||||
X2 = np.random.randn(20, kern.input_dim)
|
||||
result = [Kern_check_model(kern, X=X).is_positive_definite(),
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose),
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose),
|
||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose),
|
||||
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose),
|
||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)]
|
||||
# Need to check
|
||||
#Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)]
|
||||
# but currently I think these aren't implemented.
|
||||
return np.all(result)
|
||||
if verbose:
|
||||
print("Checking covariance function is positive definite.")
|
||||
result = Kern_check_model(kern, X=X).is_positive_definite()
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Positive definite check failed for " + kern.name + " covariance function.")
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt X.")
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
return pass_checks
|
||||
|
|
|
|||
|
|
@ -98,9 +98,13 @@ class Matern32(Kernpart):
|
|||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
if X2 is None:
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
|
||||
else:
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
|
|
|
|||
|
|
@ -98,9 +98,12 @@ class Matern52(Kernpart):
|
|||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
if X2 is None:
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
else:
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import exponential
|
|||
import finite_dimensional
|
||||
import fixed
|
||||
import gibbs
|
||||
#import hetero #hetero.py is not commited: omitting for now. JH.
|
||||
import hierarchical
|
||||
import independent_outputs
|
||||
import linear
|
||||
import Matern32
|
||||
|
|
@ -19,8 +21,7 @@ import prod
|
|||
import rational_quadratic
|
||||
import rbfcos
|
||||
import rbf
|
||||
import rbf_inv
|
||||
import spline
|
||||
import symmetric
|
||||
import white
|
||||
import hierarchical
|
||||
import rbf_inv
|
||||
|
|
|
|||
|
|
@ -9,17 +9,18 @@ from scipy import weave
|
|||
|
||||
class Coregionalize(Kernpart):
|
||||
"""
|
||||
Kernel for intrinsic/linear coregionalization models
|
||||
Covariance function for intrinsic/linear coregionalization models
|
||||
|
||||
This kernel has the form
|
||||
This covariance has the form
|
||||
.. math::
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
||||
|
||||
An intrinsic/linear coregionalization kernel of the form
|
||||
An intrinsic/linear coregionalization covariance function of the form
|
||||
.. math::
|
||||
k_2(x, y)=\mathbf{B} k(x, y)
|
||||
|
||||
it is obtainded as the tensor product between a kernel k(x,y) and B.
|
||||
it is obtained as the tensor product between a covariance function
|
||||
k(x,y) and B.
|
||||
|
||||
:param num_outputs: number of outputs to coregionalize
|
||||
:type num_outputs: int
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import GPy
|
|||
|
||||
class Gibbs(Kernpart):
|
||||
"""
|
||||
Gibbs and MacKay non-stationary covariance function.
|
||||
Gibbs non-stationary covariance function.
|
||||
|
||||
.. math::
|
||||
|
||||
|
|
@ -25,7 +25,10 @@ class Gibbs(Kernpart):
|
|||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
|
||||
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
||||
The parameters are :math:`\sigma^2`, the process variance, and
|
||||
the parameters of l(x) which is a function that can be
|
||||
specified by the user, by default an multi-layer peceptron is
|
||||
used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -37,6 +40,15 @@ class Gibbs(Kernpart):
|
|||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
See Mark Gibbs's thesis for more details: Gibbs,
|
||||
M. N. (1997). Bayesian Gaussian Processes for Regression and
|
||||
Classification. PhD thesis, Department of Physics, University of
|
||||
Cambridge. Or also see Page 93 of Gaussian Processes for Machine
|
||||
Learning by Rasmussen and Williams. Although note that we do not
|
||||
constrain the lengthscale to be positive by default. This allows
|
||||
anticorrelation to occur. The positive constraint can be included
|
||||
by the user manually.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
|
||||
|
|
@ -89,12 +101,18 @@ class Gibbs(Kernpart):
|
|||
"""Derivative of the covariance matrix with respect to X."""
|
||||
# First account for gradients arising from presence of X in exponent.
|
||||
self._K_computations(X, X2)
|
||||
_K_dist = X[:, None, :] - X2[None, :, :]
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
|
||||
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
|
||||
# Now account for gradients arising from presence of X in lengthscale.
|
||||
self._dK_computations(dL_dK)
|
||||
target += self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
if X2 is None:
|
||||
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
else:
|
||||
target += self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
|
|
@ -102,7 +120,8 @@ class Gibbs(Kernpart):
|
|||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
pass
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
|
||||
|
||||
def _K_computations(self, X, X2=None):
|
||||
|
|
|
|||
101
GPy/kern/parts/hetero.py
Normal file
101
GPy/kern/parts/hetero.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from IPython.core.debugger import Tracer; debug_here=Tracer()
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from ...util.linalg import tdot
|
||||
from ...core.mapping import Mapping
|
||||
import GPy
|
||||
|
||||
class Hetero(Kernpart):
|
||||
"""
|
||||
TODO: Need to constrain the function outputs positive (still thinking of best way of doing this!!! Yes, intend to use transformations, but what's the *best* way). Currently just squaring output.
|
||||
|
||||
Heteroschedastic noise which depends on input location. See, for example, this paper by Goldberg et al.
|
||||
|
||||
.. math::
|
||||
|
||||
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
|
||||
|
||||
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
|
||||
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:rtype: Kernpart object
|
||||
|
||||
See this paper:
|
||||
|
||||
Goldberg, P. W. Williams, C. K. I. and Bishop,
|
||||
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
|
||||
Process Treatment In Advances in Neural Information Processing
|
||||
Systems, Volume 10, pp. 493-499. MIT Press
|
||||
|
||||
for a Gaussian process treatment of this problem.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, mapping=None, transform=None):
|
||||
self.input_dim = input_dim
|
||||
if not mapping:
|
||||
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
|
||||
if not transform:
|
||||
transform = GPy.core.transformations.logexp()
|
||||
|
||||
self.transform = transform
|
||||
self.mapping = mapping
|
||||
self.name='hetero'
|
||||
self.num_params=self.mapping.num_params
|
||||
self._set_params(self.mapping._get_params())
|
||||
|
||||
def _get_params(self):
|
||||
return self.mapping._get_params()
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.size == (self.num_params)
|
||||
self.mapping._set_params(x)
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.mapping._get_param_names()
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Return covariance between X and X2."""
|
||||
if X2==None or X2 is X:
|
||||
target[np.diag_indices_from(target)] += self._Kdiag(X)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
target+=self._Kdiag(X)
|
||||
|
||||
def _Kdiag(self, X):
|
||||
"""Helper function for computing the diagonal elements of the covariance."""
|
||||
return self.mapping.f(X).flatten()**2
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
if X2==None or X2 is X:
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dtheta(dL_dKdiag, X, target)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
if X2==None or X2 is X:
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dX(dL_dKdiag, X, target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
|
||||
|
||||
|
||||
|
|
@ -59,6 +59,45 @@ class Kernpart(object):
|
|||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
class Kernpart_stationary(Kernpart):
|
||||
def __init__(self, input_dim, lengthscale=None, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
self.num_params = 2
|
||||
if lengthscale is not None:
|
||||
self.lengthscale = np.asarray(lengthscale)
|
||||
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
self.lengthscale = np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 1
|
||||
if lengthscale is not None:
|
||||
self.lengthscale = np.asarray(lengthscale)
|
||||
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
self.lengthscale = np.ones(self.input_dim)
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
|
||||
def _set_params(self, x):
|
||||
self.lengthscale = x
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
# reset cached results
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
# For stationary covariances, derivative of diagonal elements
|
||||
# wrt lengthscale is 0.
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
|
||||
class Kernpart_inner(Kernpart):
|
||||
def __init__(self,input_dim):
|
||||
"""
|
||||
|
|
@ -74,3 +113,5 @@ class Kernpart_inner(Kernpart):
|
|||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -99,7 +99,10 @@ class Linear(Kernpart):
|
|||
target += tmp.sum()
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
if X2 is None:
|
||||
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
target += 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
|
|
|
|||
|
|
@ -110,9 +110,13 @@ class MLP(Kernpart):
|
|||
arg = self._K_asin_arg
|
||||
numer = self._K_numer
|
||||
denom = self._K_denom
|
||||
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
|
||||
denom3 = denom*denom*denom
|
||||
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||
if X2 is not None:
|
||||
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
|
||||
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||
else:
|
||||
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
|
||||
target += 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
|
|
|
|||
|
|
@ -103,7 +103,10 @@ class POLY(Kernpart):
|
|||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_poly_arg
|
||||
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
if X2 is None:
|
||||
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
|
|
|
|||
|
|
@ -70,10 +70,12 @@ class RationalQuadratic(Kernpart):
|
|||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
|
||||
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
||||
if X2 is None:
|
||||
dist2 = np.square((X-X.T)/self.lengthscale)
|
||||
dX = -2.*self.variance*self.power * (X-X.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
||||
else:
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
||||
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
|
|
|
|||
|
|
@ -138,7 +138,10 @@ class RBF(Kernpart):
|
|||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
|
|
|
|||
|
|
@ -133,7 +133,10 @@ class RBFInv(RBF):
|
|||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue