mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
rename dK_dtheta > gradients_X
This commit is contained in:
parent
6a1c700c03
commit
b944427733
35 changed files with 136 additions and 129 deletions
|
|
@ -8,6 +8,7 @@ from parts.prod import Prod as prod
|
|||
from parts.linear import Linear
|
||||
from parts.kernpart import Kernpart
|
||||
from ..core.parameterization import Parameterized
|
||||
from GPy.core.parameterization.param import Param
|
||||
|
||||
class kern(Parameterized):
|
||||
def __init__(self, input_dim, parts=[], input_slices=None):
|
||||
|
|
@ -84,7 +85,7 @@ class kern(Parameterized):
|
|||
# represents the gradient in the transformed space (i.e. that given by
|
||||
# get_params_transformed())
|
||||
#
|
||||
# :param g: the gradient vector for the current model, usually created by dK_dtheta
|
||||
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
|
||||
# """
|
||||
# x = self._get_params()
|
||||
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
|
||||
|
|
@ -294,7 +295,7 @@ class kern(Parameterized):
|
|||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2=None):
|
||||
def _param_grad_helper(self, dL_dK, X, X2=None):
|
||||
"""
|
||||
Compute the gradient of the covariance function with respect to the parameters.
|
||||
|
||||
|
|
@ -310,9 +311,9 @@ class kern(Parameterized):
|
|||
assert X.shape[1] == self.input_dim
|
||||
target = np.zeros(self.size)
|
||||
if X2 is None:
|
||||
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
else:
|
||||
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
|
||||
return self._transform_gradients(target)
|
||||
|
||||
|
|
@ -484,6 +485,7 @@ from GPy.core.model import Model
|
|||
class Kern_check_model(Model):
|
||||
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Model.__init__(self, 'kernel_test_model')
|
||||
num_samples = 20
|
||||
num_samples2 = 10
|
||||
if kernel==None:
|
||||
|
|
@ -495,14 +497,12 @@ class Kern_check_model(Model):
|
|||
dL_dK = np.ones((X.shape[0], X.shape[0]))
|
||||
else:
|
||||
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||
|
||||
|
||||
self.kernel=kernel
|
||||
self.add_parameter(kernel)
|
||||
self.X = X
|
||||
self.X2 = X2
|
||||
self.dL_dK = dL_dK
|
||||
#self.constrained_indices=[]
|
||||
#self.constraints=[]
|
||||
Model.__init__(self, 'kernel_test_model')
|
||||
|
||||
def is_positive_definite(self):
|
||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||
|
|
@ -511,15 +511,6 @@ class Kern_check_model(Model):
|
|||
else:
|
||||
return True
|
||||
|
||||
def _get_params(self):
|
||||
return self.kernel._get_params()
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.kernel._get_param_names()
|
||||
|
||||
def _set_params(self, x):
|
||||
self.kernel._set_params(x)
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
|
||||
|
||||
|
|
@ -532,7 +523,7 @@ class Kern_check_dK_dtheta(Kern_check_model):
|
|||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
|
||||
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
|
||||
|
||||
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
||||
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
|
||||
|
|
@ -540,6 +531,8 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
|
|||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
if dL_dK==None:
|
||||
self.dL_dK = np.ones((self.X.shape[0]))
|
||||
def parameters_changed(self):
|
||||
self.kernel.update_gradients_full(self.dL_dK, self.X)
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||
|
|
@ -551,41 +544,25 @@ class Kern_check_dK_dX(Kern_check_model):
|
|||
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
|
||||
self.remove_parameter(kernel)
|
||||
self.X = Param('X', self.X)
|
||||
self.add_parameter(self.X)
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
|
||||
|
||||
def _get_params(self):
|
||||
return self.X.flatten()
|
||||
|
||||
def _set_params(self, x):
|
||||
self.X=x.reshape(self.X.shape)
|
||||
|
||||
class Kern_check_dKdiag_dX(Kern_check_model):
|
||||
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
if dL_dK==None:
|
||||
self.dL_dK = np.ones((self.X.shape[0]))
|
||||
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
|
||||
|
||||
def _get_params(self):
|
||||
return self.X.flatten()
|
||||
|
||||
def _set_params(self, x):
|
||||
self.X=x.reshape(self.X.shape)
|
||||
|
||||
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
||||
"""
|
||||
This function runs on kernels to check the correctness of their
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class Brownian(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target += np.sum(np.fmin(X,X2.T)*dL_dK)
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class Matern32(Kernpart):
|
|||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class Matern52(Kernpart):
|
|||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ class ODE_1(Kernpart):
|
|||
|
||||
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.abs(X - X2.T)
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ class Eq_ode1(Kernpart):
|
|||
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
||||
pass
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
|
||||
# First extract times and indices.
|
||||
self._extract_t_indices(X, X2, dL_dK=dL_dK)
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class Exponential(Kernpart):
|
|||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ class FiniteDimensional(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
product = np.diag(self.K(X, X))
|
||||
np.add(target,product,target)
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
def _param_grad_helper(self,X,X2,target):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = np.column_stack([f(X) for f in self.F])
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ class Fixed(Kernpart):
|
|||
def K(self, X, X2, target):
|
||||
target += self.variance * self.fixed_K
|
||||
|
||||
def dK_dtheta(self, partial, X, X2, target):
|
||||
def _param_grad_helper(self, partial, X, X2, target):
|
||||
target += (partial * self.fixed_K).sum()
|
||||
|
||||
def gradients_X(self, partial, X, X2, target):
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class Gibbs(Kernpart):
|
|||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
self._dK_computations(dL_dK)
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class Hetero(Kernpart):
|
|||
"""Helper function for computing the diagonal elements of the covariance."""
|
||||
return self.mapping.f(X).flatten()**2
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
if (X2 is None) or (X2 is X):
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
|
|
|
|||
|
|
@ -50,9 +50,9 @@ class Hierarchical(Kernpart):
|
|||
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
#[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
X, X2, slices, slices2 = self._sort_slices(X,X2)
|
||||
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
|
||||
[[[[k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
|
||||
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
|
|
|
|||
|
|
@ -70,13 +70,13 @@ class IndependentOutputs(Kernpart):
|
|||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
[[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
|
|
|
|||
|
|
@ -75,14 +75,14 @@ class Kernpart(Parameterized):
|
|||
raise NotImplementedError
|
||||
def Kdiag(self,X,target):
|
||||
raise NotImplementedError
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
# In the base case compute this by calling dK_dtheta. Need to
|
||||
# In the base case compute this by calling _param_grad_helper. Need to
|
||||
# override for stationary covariances (for example) to save
|
||||
# time.
|
||||
for i in range(X.shape[0]):
|
||||
self.dK_dtheta(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
||||
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
||||
def psi0(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
||||
|
|
@ -109,8 +109,15 @@ class Kernpart(Parameterized):
|
|||
raise NotImplementedError
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||
raise NotImplementedError
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
"""Set the gradients of all parameters when doing sparse (M) inference."""
|
||||
raise NotImplementedError
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
||||
raise NotImplementedError
|
||||
|
||||
class Kernpart_stationary(Kernpart):
|
||||
def __init__(self, input_dim, lengthscale=None, ARD=False):
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ class MLP(Kernpart):
|
|||
self._K_diag_computations(X)
|
||||
target+= self.variance*self._K_diag_dvar
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
denom3 = self._K_denom*self._K_denom*self._K_denom
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ class PeriodicMatern32(Kernpart):
|
|||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ class PeriodicMatern52(Kernpart):
|
|||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ class PeriodicExponential(Kernpart):
|
|||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ class POLY(Kernpart):
|
|||
self._K_diag_computations(X)
|
||||
target+= self.variance*self._K_diag_dvar
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
|
||||
|
|
|
|||
|
|
@ -54,15 +54,15 @@ class Prod(Kernpart):
|
|||
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""Derivative of the covariance matrix with respect to the parameters."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
||||
else:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
|
|
|
|||
|
|
@ -41,15 +41,15 @@ class prod_orthogonal(Kernpart):
|
|||
self._K_computations(X,X2)
|
||||
target += self._K1 * self._K2
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
|
||||
else:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class RationalQuadratic(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
target += self.variance
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from kernpart import Kernpart
|
|||
from ...util.linalg import tdot
|
||||
from ...util.misc import fast_array_equal, param_to_array
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class RBF(Kernpart):
|
||||
"""
|
||||
|
|
@ -50,7 +51,7 @@ class RBF(Kernpart):
|
|||
else:
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
|
||||
self.variance = Param('variance', variance)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
|
||||
self.lengthscale = Param('lengthscale', lengthscale)
|
||||
self.lengthscale.add_observer(self, self.update_lengthscale)
|
||||
|
|
@ -141,7 +142,7 @@ class RBF(Kernpart):
|
|||
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||
if not self.ARD:
|
||||
self.lengthscale.gradeint = dpsi1_dlength.sum()
|
||||
self.lengthscale.gradient = dpsi1_dlength.sum()
|
||||
else:
|
||||
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
|
||||
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ class RBFInv(RBF):
|
|||
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
|
||||
|
||||
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
target[0] += np.sum(self._K_dvar * dL_dK)
|
||||
if self.ARD:
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ class RBFCos(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
target[0] += np.sum(dL_dK*self._dvar)
|
||||
if self.ARD:
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ class Spline(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()**3/3.
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
def _param_grad_helper(self,X,X2,target):
|
||||
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class Symmetric(Kernpart):
|
|||
self.k.K(X,AX2,target)
|
||||
self.k.K(AX,AX2,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
|
|
@ -48,10 +48,10 @@ class Symmetric(Kernpart):
|
|||
ZX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.dK_dtheta(dL_dK,X,X2,target)
|
||||
self.k.dK_dtheta(dL_dK,AX,X2,target)
|
||||
self.k.dK_dtheta(dL_dK,X,AX2,target)
|
||||
self.k.dK_dtheta(dL_dK,AX,AX2,target)
|
||||
self.k._param_grad_helper(dL_dK,X,X2,target)
|
||||
self.k._param_grad_helper(dL_dK,AX,X2,target)
|
||||
self.k._param_grad_helper(dL_dK,X,AX2,target)
|
||||
self.k._param_grad_helper(dL_dK,AX,AX2,target)
|
||||
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
|
|
|
|||
|
|
@ -348,7 +348,7 @@ class spkern(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
self._weave_inline(self._Kdiag_code, X, target)
|
||||
|
||||
def dK_dtheta(self,partial,X,Z,target):
|
||||
def _param_grad_helper(self,partial,X,Z,target):
|
||||
if Z is None:
|
||||
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue