mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 03:22:38 +02:00
changes to rbf and white to allow new parameter gradient structure
This commit is contained in:
parent
ad5a967b39
commit
7b5e8a9ffc
6 changed files with 43 additions and 46 deletions
|
|
@ -24,7 +24,7 @@ class ExactGaussianInference(object):
|
||||||
"""
|
"""
|
||||||
find a matrix L which satisfies LL^T = YY^T.
|
find a matrix L which satisfies LL^T = YY^T.
|
||||||
|
|
||||||
Note that L may have fewer columns than Y, else L=Y.
|
Note that L may have fewer columns than Y, else L=Y.
|
||||||
"""
|
"""
|
||||||
N, D = Y.shape
|
N, D = Y.shape
|
||||||
if (N>D):
|
if (N>D):
|
||||||
|
|
@ -33,22 +33,26 @@ class ExactGaussianInference(object):
|
||||||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||||
raise NotImplementedError, 'TODO' #TODO
|
raise NotImplementedError, 'TODO' #TODO
|
||||||
|
|
||||||
def inference(self, K, likelihood, Y, Y_metadata=None):
|
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Returns a Posterior class containing essential quantities of the posterior
|
Returns a Posterior class containing essential quantities of the posterior
|
||||||
"""
|
"""
|
||||||
YYT_factor = self.get_YYTfactor(Y)
|
YYT_factor = self.get_YYTfactor(Y)
|
||||||
|
|
||||||
|
K = kern.K(X)
|
||||||
|
|
||||||
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
|
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
|
||||||
|
|
||||||
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
||||||
|
|
||||||
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
|
||||||
|
|
||||||
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
|
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
|
||||||
|
|
||||||
dL_dtheta_lik = likelihood._gradients(np.diag(dL_dK))
|
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
||||||
|
|
||||||
return Posterior(log_marginal, dL_dK, dL_dtheta_lik, LW, alpha, K)
|
kern.update_gradients_full(dL_dK)
|
||||||
|
|
||||||
|
likelihood.update_gradients(np.diag(dL_dK))
|
||||||
|
|
||||||
|
return Posterior(log_marginal, dL_dK, LW, alpha, K)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from ..util.linalg import mdot, jitchol, pddet, dpotrs
|
||||||
from functools import partial as partial_func
|
from functools import partial as partial_func
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
class Laplace(likelihood):
|
class LaplaceInference(object):
|
||||||
"""Laplace approximation to a posterior"""
|
"""Laplace approximation to a posterior"""
|
||||||
|
|
||||||
def __init__(self, data, noise_model, extra_data=None):
|
def __init__(self, data, noise_model, extra_data=None):
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ class Posterior(object):
|
||||||
schemes and the model classes.
|
schemes and the model classes.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, log_marginal, dL_dK, dL_dtheta_lik, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
|
def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
|
||||||
"""
|
"""
|
||||||
log_marginal: log p(Y|X)
|
log_marginal: log p(Y|X)
|
||||||
dL_dK: d/dK log p(Y|X)
|
dL_dK: d/dK log p(Y|X)
|
||||||
|
|
@ -56,7 +56,7 @@ class Posterior(object):
|
||||||
if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)):
|
if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)):
|
||||||
pass # we have sufficient to compute the posterior
|
pass # we have sufficient to compute the posterior
|
||||||
else:
|
else:
|
||||||
raise ValueError, "insufficient onformation to compute the posterior"
|
raise ValueError, "insufficient information to compute the posterior"
|
||||||
|
|
||||||
#option 1:
|
#option 1:
|
||||||
self._woodbury_chol = woodbury_chol
|
self._woodbury_chol = woodbury_chol
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,9 @@ class RBF(Kernpart):
|
||||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
self._X, self._X2 = np.empty(shape=(2, 1))
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def K(self, X, X2, target):
|
def K(self, X, X2, target):
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target += self.variance * self._K_dvar
|
target += self.variance * self._K_dvar
|
||||||
|
|
@ -87,6 +90,20 @@ class RBF(Kernpart):
|
||||||
def Kdiag(self, X, target):
|
def Kdiag(self, X, target):
|
||||||
np.add(target, self.variance, target)
|
np.add(target, self.variance, target)
|
||||||
|
|
||||||
|
def psi0(self, Z, mu, S, target):
|
||||||
|
target += self.variance
|
||||||
|
|
||||||
|
def psi1(self, Z, mu, S, target):
|
||||||
|
self._psi_computations(Z, mu, S)
|
||||||
|
target += self._psi1
|
||||||
|
|
||||||
|
def psi2(self, Z, mu, S, target):
|
||||||
|
self._psi_computations(Z, mu, S)
|
||||||
|
target += self._psi2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
def update_gradients_full(self, dL_dK, X):
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
self.variance.gradient = np.sum(self._K_dvar * dL_dK)
|
self.variance.gradient = np.sum(self._K_dvar * dL_dK)
|
||||||
|
|
@ -150,9 +167,7 @@ class RBF(Kernpart):
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
||||||
|
|
||||||
|
def _gradients_X(self, dL_dK, X, X2, target):
|
||||||
|
|
||||||
def dK_dX(self, dL_dK, X, X2, target):
|
|
||||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -165,22 +180,13 @@ class RBF(Kernpart):
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# PSI statistics #
|
# PSI statistics #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def psi0(self, Z, mu, S, target):
|
|
||||||
target += self.variance
|
|
||||||
|
|
||||||
|
|
||||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def psi1(self, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
target += self._psi1
|
|
||||||
|
|
||||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||||
|
|
@ -193,10 +199,6 @@ class RBF(Kernpart):
|
||||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||||
|
|
||||||
def psi2(self, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
target += self._psi2
|
|
||||||
|
|
||||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||||
|
|
@ -283,6 +285,7 @@ class RBF(Kernpart):
|
||||||
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
||||||
X, X2 = param_to_array(X, X2)
|
X, X2 = param_to_array(X, X2)
|
||||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,18 +19,7 @@ class White(Kernpart):
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
self.variance = Param('variance', variance)
|
self.variance = Param('variance', variance)
|
||||||
self.add_parameters(self.variance)
|
self.add_parameters(self.variance)
|
||||||
# self._set_params(np.array([variance]).flatten())
|
|
||||||
self._psi1 = 0 # TODO: more elegance here
|
self._psi1 = 0 # TODO: more elegance here
|
||||||
|
|
||||||
# def _get_params(self):
|
|
||||||
# return self.variance
|
|
||||||
#
|
|
||||||
# def _set_params(self,x):
|
|
||||||
# assert x.shape==(1,)
|
|
||||||
# self.variance = x
|
|
||||||
#
|
|
||||||
# def _get_param_names(self):
|
|
||||||
# return ['variance']
|
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self,X,X2,target):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -39,14 +28,19 @@ class White(Kernpart):
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
target += self.variance
|
target += self.variance
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def update_gradients_full(self, dL_dK, X):
|
||||||
if X2 is None:
|
self.variance.gradient = np.trace(dL_dK)
|
||||||
target += np.trace(dL_dK)
|
|
||||||
|
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||||
target += np.sum(dL_dKdiag)
|
target += np.sum(dL_dKdiag)
|
||||||
|
|
||||||
def dK_dX(self,dL_dK,X,X2,target):
|
def gradients_X(self,dL_dK,X,X2,target):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||||
|
|
|
||||||
|
|
@ -52,12 +52,8 @@ class Gaussian(Likelihood):
|
||||||
def covariance_matrix(self, Y, Y_metadata=None):
|
def covariance_matrix(self, Y, Y_metadata=None):
|
||||||
return np.eye(Y.shape[0]) * self.variance
|
return np.eye(Y.shape[0]) * self.variance
|
||||||
|
|
||||||
def _gradients(self, partial):
|
def set_gradients(self, partial):
|
||||||
"""
|
self.variance.gradient = np.sum(partial)
|
||||||
Return the derivative of the log marginal likelihood wrt self.variance,
|
|
||||||
given the appropriate partial derivative
|
|
||||||
"""
|
|
||||||
return np.sum(partial)
|
|
||||||
|
|
||||||
def _preprocess_values(self, Y):
|
def _preprocess_values(self, Y):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue