mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-11 04:52:37 +02:00
Changed kern.py so that X2 is correctly passed as None to the kern parts for dK_dX. Modified several kern parts so that dK_dX is correctly computed (factors of 2 missing). Removed spurious factors of 2 from gplvm, bcgplvm, sparse_gp and fitc code.
This commit is contained in:
parent
2e6cac0d34
commit
00d335444d
20 changed files with 259 additions and 68 deletions
|
|
@ -174,7 +174,7 @@ class FITC(SparseGP):
|
|||
|
||||
def dL_dZ(self):
|
||||
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
|
||||
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
|
||||
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
|
||||
dL_dZ += self._dpsi1_dX
|
||||
dL_dZ += self._dKmm_dX
|
||||
return dL_dZ
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
|
|
@ -456,8 +456,8 @@ class Model(Parameterized):
|
|||
gradient = self.objective_function_gradients(x)
|
||||
|
||||
numerical_gradient = (f1 - f2) / (2 * dx)
|
||||
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
|
||||
|
||||
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
|
||||
|
||||
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance
|
||||
else:
|
||||
# check the gradient of each parameter individually, and do some pretty printing
|
||||
|
|
@ -496,7 +496,7 @@ class Model(Parameterized):
|
|||
gradient = self.objective_function_gradients(x)[i]
|
||||
|
||||
numerical_gradient = (f1 - f2) / (2 * step)
|
||||
ratio = (f1 - f2) / (2 * step * gradient)
|
||||
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient))
|
||||
difference = np.abs((f1 - f2) / 2 / step - gradient)
|
||||
|
||||
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
||||
|
|
|
|||
|
|
@ -240,7 +240,7 @@ class SparseGP(GPBase):
|
|||
"""
|
||||
The derivative of the bound wrt the inducing inputs Z
|
||||
"""
|
||||
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
||||
dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||
|
|
|
|||
|
|
@ -18,9 +18,11 @@ class transformation(object):
|
|||
def gradfactor(self, f):
|
||||
""" df_dx evaluated at self.f(x)=f"""
|
||||
raise NotImplementedError
|
||||
|
||||
def initialize(self, f):
|
||||
""" produce a sensible initial value for f(x)"""
|
||||
raise NotImplementedError
|
||||
|
||||
def __str__(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
@ -42,15 +44,13 @@ class logexp(transformation):
|
|||
class negative_logexp(transformation):
|
||||
domain = NEGATIVE
|
||||
def f(self, x):
|
||||
return -logexp.f(x) #np.log(1. + np.exp(x))
|
||||
return -logexp.f(x)
|
||||
def finv(self, f):
|
||||
return logexp.finv(-f) #np.log(np.exp(-f) - 1.)
|
||||
return logexp.finv(-f)
|
||||
def gradfactor(self, f):
|
||||
return -logexp.gradfactor(-f)
|
||||
#ef = np.exp(-f)
|
||||
#return -(ef - 1.) / ef
|
||||
def initialize(self, f):
|
||||
return -logexp.initialize(f) #np.abs(f)
|
||||
return -logexp.initialize(f)
|
||||
def __str__(self):
|
||||
return '(-ve)'
|
||||
|
||||
|
|
@ -82,7 +82,6 @@ class logexp_clipped(logexp):
|
|||
return '(+ve_c)'
|
||||
|
||||
class exponent(transformation):
|
||||
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative exponent below. See old MATLAB code.
|
||||
domain = POSITIVE
|
||||
def f(self, x):
|
||||
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import numpy as np
|
|||
from kern import kern
|
||||
import parts
|
||||
|
||||
|
||||
def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
|
||||
"""
|
||||
Construct an RBF kernel
|
||||
|
|
@ -103,6 +102,12 @@ def gibbs(input_dim,variance=1., mapping=None):
|
|||
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def hetero(input_dim, mapping=None, transform=None):
|
||||
"""
|
||||
"""
|
||||
part = parts.hetero.Hetero(input_dim,mapping,transform)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
|
||||
"""
|
||||
Construct a polynomial kernel
|
||||
|
|
@ -135,6 +140,7 @@ def white(input_dim,variance=1.):
|
|||
part = parts.white.White(input_dim,variance)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
|
||||
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
||||
"""
|
||||
Construct an exponential kernel
|
||||
|
|
|
|||
101
GPy/kern/kern.py
101
GPy/kern/kern.py
|
|
@ -12,7 +12,9 @@ from matplotlib.transforms import offset_copy
|
|||
class kern(Parameterized):
|
||||
def __init__(self, input_dim, parts=[], input_slices=None):
|
||||
"""
|
||||
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
|
||||
This is the main kernel class for GPy. It handles multiple
|
||||
(additive) kernel functions, and keeps track of various things
|
||||
like which parameters live where.
|
||||
|
||||
The technical code for kernels is divided into _parts_ (see
|
||||
e.g. rbf.py). This object contains a list of parts, which are
|
||||
|
|
@ -33,6 +35,11 @@ class kern(Parameterized):
|
|||
|
||||
self.input_dim = input_dim
|
||||
|
||||
part_names = [k.name for k in self.parts]
|
||||
self.name=''
|
||||
for name in part_names:
|
||||
self.name += name + '+'
|
||||
self.name = self.name[:-1]
|
||||
# deal with input_slices
|
||||
if input_slices is None:
|
||||
self.input_slices = [slice(None) for p in self.parts]
|
||||
|
|
@ -333,10 +340,8 @@ class kern(Parameterized):
|
|||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target = np.zeros_like(X)
|
||||
if X2 is None:
|
||||
if X2 is None:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
else:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
|
@ -653,17 +658,85 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
:param X2: X2 input values to test the covariance function.
|
||||
:type X2: ndarray
|
||||
"""
|
||||
pass_checks = True
|
||||
if X==None:
|
||||
X = np.random.randn(10, kern.input_dim)
|
||||
if X2==None:
|
||||
X2 = np.random.randn(20, kern.input_dim)
|
||||
result = [Kern_check_model(kern, X=X).is_positive_definite(),
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose),
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose),
|
||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose),
|
||||
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose),
|
||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)]
|
||||
# Need to check
|
||||
#Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)]
|
||||
# but currently I think these aren't implemented.
|
||||
return np.all(result)
|
||||
if verbose:
|
||||
print("Checking covariance function is positive definite.")
|
||||
result = Kern_check_model(kern, X=X).is_positive_definite()
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Positive definite check failed for " + kern.name + " covariance function.")
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt X.")
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
return pass_checks
|
||||
|
|
|
|||
|
|
@ -98,9 +98,13 @@ class Matern32(Kernpart):
|
|||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
if X2 is None:
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
|
||||
else:
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
|
|
|
|||
|
|
@ -98,9 +98,12 @@ class Matern52(Kernpart):
|
|||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
if X2 is None:
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
else:
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import exponential
|
|||
import finite_dimensional
|
||||
import fixed
|
||||
import gibbs
|
||||
import hetero
|
||||
import hierarchical
|
||||
import independent_outputs
|
||||
import linear
|
||||
import Matern32
|
||||
|
|
@ -19,8 +21,7 @@ import prod
|
|||
import rational_quadratic
|
||||
import rbfcos
|
||||
import rbf
|
||||
import rbf_inv
|
||||
import spline
|
||||
import symmetric
|
||||
import white
|
||||
import hierarchical
|
||||
import rbf_inv
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import GPy
|
|||
|
||||
class Gibbs(Kernpart):
|
||||
"""
|
||||
Gibbs and MacKay non-stationary covariance function.
|
||||
Gibbs non-stationary covariance function.
|
||||
|
||||
.. math::
|
||||
|
||||
|
|
@ -25,7 +25,10 @@ class Gibbs(Kernpart):
|
|||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
|
||||
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
||||
The parameters are :math:`\sigma^2`, the process variance, and
|
||||
the parameters of l(x) which is a function that can be
|
||||
specified by the user, by default an multi-layer peceptron is
|
||||
used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -37,6 +40,15 @@ class Gibbs(Kernpart):
|
|||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
See Mark Gibbs's thesis for more details: Gibbs,
|
||||
M. N. (1997). Bayesian Gaussian Processes for Regression and
|
||||
Classification. PhD thesis, Department of Physics, University of
|
||||
Cambridge. Or also see Page 93 of Gaussian Processes for Machine
|
||||
Learning by Rasmussen and Williams. Although note that we do not
|
||||
constrain the lengthscale to be positive by default. This allows
|
||||
anticorrelation to occur. The positive constraint can be included
|
||||
by the user manually.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
|
||||
|
|
@ -89,12 +101,18 @@ class Gibbs(Kernpart):
|
|||
"""Derivative of the covariance matrix with respect to X."""
|
||||
# First account for gradients arising from presence of X in exponent.
|
||||
self._K_computations(X, X2)
|
||||
_K_dist = X[:, None, :] - X2[None, :, :]
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
|
||||
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
|
||||
# Now account for gradients arising from presence of X in lengthscale.
|
||||
self._dK_computations(dL_dK)
|
||||
target += self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
if X2 is None:
|
||||
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
else:
|
||||
target += self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
|
|
@ -102,7 +120,8 @@ class Gibbs(Kernpart):
|
|||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
pass
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
|
||||
|
||||
def _K_computations(self, X, X2=None):
|
||||
|
|
|
|||
|
|
@ -59,6 +59,45 @@ class Kernpart(object):
|
|||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
class Kernpart_stationary(Kernpart):
|
||||
def __init__(self, input_dim, lengthscale=None, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
self.num_params = 2
|
||||
if lengthscale is not None:
|
||||
self.lengthscale = np.asarray(lengthscale)
|
||||
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
self.lengthscale = np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 1
|
||||
if lengthscale is not None:
|
||||
self.lengthscale = np.asarray(lengthscale)
|
||||
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
self.lengthscale = np.ones(self.input_dim)
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
|
||||
def _set_params(self, x):
|
||||
self.lengthscale = x
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
# reset cached results
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
# For stationary covariances, derivative of diagonal elements
|
||||
# wrt lengthscale is 0.
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
|
||||
class Kernpart_inner(Kernpart):
|
||||
def __init__(self,input_dim):
|
||||
"""
|
||||
|
|
@ -74,3 +113,5 @@ class Kernpart_inner(Kernpart):
|
|||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -99,7 +99,10 @@ class Linear(Kernpart):
|
|||
target += tmp.sum()
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
if X2 is None:
|
||||
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
target += 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
|
|
|
|||
|
|
@ -110,9 +110,13 @@ class MLP(Kernpart):
|
|||
arg = self._K_asin_arg
|
||||
numer = self._K_numer
|
||||
denom = self._K_denom
|
||||
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
|
||||
denom3 = denom*denom*denom
|
||||
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||
if X2 is not None:
|
||||
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
|
||||
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||
else:
|
||||
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
|
||||
target += 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
|
|
|
|||
|
|
@ -103,7 +103,10 @@ class POLY(Kernpart):
|
|||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_poly_arg
|
||||
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
if X2 is None:
|
||||
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
|
|
|
|||
|
|
@ -70,10 +70,12 @@ class RationalQuadratic(Kernpart):
|
|||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
|
||||
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
||||
if X2 is None:
|
||||
dist2 = np.square((X-X.T)/self.lengthscale)
|
||||
dX = -2.*self.variance*self.power * (X-X.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
||||
else:
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
||||
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
|
|
|
|||
|
|
@ -138,7 +138,10 @@ class RBF(Kernpart):
|
|||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
|
|
|
|||
|
|
@ -133,7 +133,10 @@ class RBFInv(RBF):
|
|||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class BCGPLVM(GPLVM):
|
|||
GP._set_params(self, x[self.mapping.num_params:])
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
dL_df = 2.*self.kern.dK_dX(self.dL_dK, self.X)
|
||||
dL_df = self.kern.dK_dX(self.dL_dK, self.X)
|
||||
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
|
||||
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))
|
||||
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ class GPLVM(GP):
|
|||
GP._set_params(self, x[self.X.size:])
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X)
|
||||
dL_dX = self.kern.dK_dX(self.dL_dK, self.X)
|
||||
|
||||
return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self)))
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import unittest
|
|||
import numpy as np
|
||||
import GPy
|
||||
|
||||
|
||||
verbose = False
|
||||
|
||||
class KernelTests(unittest.TestCase):
|
||||
def test_kerneltie(self):
|
||||
|
|
@ -19,25 +19,54 @@ class KernelTests(unittest.TestCase):
|
|||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_rbfkernel(self):
|
||||
verbose = False
|
||||
kern = GPy.kern.rbf(5)
|
||||
self.assertTrue(GPy.kern.Kern_check_model(kern).is_positive_definite())
|
||||
self.assertTrue(GPy.kern.Kern_check_dK_dtheta(kern).checkgrad(verbose=verbose))
|
||||
self.assertTrue(GPy.kern.Kern_check_dKdiag_dtheta(kern).checkgrad(verbose=verbose))
|
||||
self.assertTrue(GPy.kern.Kern_check_dK_dX(kern).checkgrad(verbose=verbose))
|
||||
kern = GPy.kern.rbf(5)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_rbf_invkernel(self):
|
||||
kern = GPy.kern.rbf_inv(5)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_Matern32kernel(self):
|
||||
kern = GPy.kern.Matern32(5)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_Matern52kernel(self):
|
||||
kern = GPy.kern.Matern52(5)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_linearkernel(self):
|
||||
kern = GPy.kern.linear(5)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_periodic_exponentialkernel(self):
|
||||
kern = GPy.kern.periodic_exponential(1)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_periodic_Matern32kernel(self):
|
||||
kern = GPy.kern.periodic_Matern32(1)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_periodic_Matern52kernel(self):
|
||||
kern = GPy.kern.periodic_Matern52(1)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_rational_quadratickernel(self):
|
||||
kern = GPy.kern.rational_quadratic(1)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_gibbskernel(self):
|
||||
verbose = False
|
||||
kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1))
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_heterokernel(self):
|
||||
kern = GPy.kern.hetero(5, mapping=GPy.mappings.Linear(5, 1), transform=GPy.core.transformations.logexp())
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_mlpkernel(self):
|
||||
verbose = False
|
||||
kern = GPy.kern.mlp(5)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_polykernel(self):
|
||||
verbose = False
|
||||
kern = GPy.kern.poly(5, degree=4)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
|
|
@ -48,9 +77,8 @@ class KernelTests(unittest.TestCase):
|
|||
X = np.random.rand(30, 4)
|
||||
K = np.dot(X, X.T)
|
||||
kernel = GPy.kern.fixed(4, K)
|
||||
Y = np.ones((30,1))
|
||||
m = GPy.models.GPRegression(X,Y,kernel=kernel)
|
||||
self.assertTrue(m.checkgrad())
|
||||
kern = GPy.kern.poly(5, degree=4)
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
def test_coregionalisation(self):
|
||||
X1 = np.random.rand(50,1)*8
|
||||
|
|
@ -63,9 +91,8 @@ class KernelTests(unittest.TestCase):
|
|||
|
||||
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
||||
k2 = GPy.kern.coregionalise(2,1)
|
||||
k = k1.prod(k2,tensor=True)
|
||||
m = GPy.models.GPRegression(X,Y,kernel=k)
|
||||
self.assertTrue(m.checkgrad())
|
||||
kern = k1**k2
|
||||
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue