changes to rbf and white to allow new parameter gradient structure

This commit is contained in:
James Hensman 2014-01-24 14:06:16 +00:00
parent ad5a967b39
commit 7b5e8a9ffc
6 changed files with 43 additions and 46 deletions

View file

@ -24,7 +24,7 @@ class ExactGaussianInference(object):
""" """
find a matrix L which satisfies LL^T = YY^T. find a matrix L which satisfies LL^T = YY^T.
Note that L may have fewer columns than Y, else L=Y. Note that L may have fewer columns than Y, else L=Y.
""" """
N, D = Y.shape N, D = Y.shape
if (N>D): if (N>D):
@ -33,22 +33,26 @@ class ExactGaussianInference(object):
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
raise NotImplementedError, 'TODO' #TODO raise NotImplementedError, 'TODO' #TODO
def inference(self, K, likelihood, Y, Y_metadata=None): def inference(self, kern, X, likelihood, Y, Y_metadata=None):
""" """
Returns a Posterior class containing essential quantities of the posterior Returns a Posterior class containing essential quantities of the posterior
""" """
YYT_factor = self.get_YYTfactor(Y) YYT_factor = self.get_YYTfactor(Y)
K = kern.K(X)
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata)) Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
alpha, _ = dpotrs(LW, YYT_factor, lower=1) alpha, _ = dpotrs(LW, YYT_factor, lower=1)
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor)) log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
dL_dtheta_lik = likelihood._gradients(np.diag(dL_dK)) dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
return Posterior(log_marginal, dL_dK, dL_dtheta_lik, LW, alpha, K) kern.update_gradients_full(dL_dK)
likelihood.update_gradients(np.diag(dL_dK))
return Posterior(log_marginal, dL_dK, LW, alpha, K)

View file

@ -17,7 +17,7 @@ from ..util.linalg import mdot, jitchol, pddet, dpotrs
from functools import partial as partial_func from functools import partial as partial_func
import warnings import warnings
class Laplace(likelihood): class LaplaceInference(object):
"""Laplace approximation to a posterior""" """Laplace approximation to a posterior"""
def __init__(self, data, noise_model, extra_data=None): def __init__(self, data, noise_model, extra_data=None):

View file

@ -14,7 +14,7 @@ class Posterior(object):
schemes and the model classes. schemes and the model classes.
""" """
def __init__(self, log_marginal, dL_dK, dL_dtheta_lik, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None): def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
""" """
log_marginal: log p(Y|X) log_marginal: log p(Y|X)
dL_dK: d/dK log p(Y|X) dL_dK: d/dK log p(Y|X)
@ -56,7 +56,7 @@ class Posterior(object):
if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)): if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)):
pass # we have sufficient to compute the posterior pass # we have sufficient to compute the posterior
else: else:
raise ValueError, "insufficient onformation to compute the posterior" raise ValueError, "insufficient information to compute the posterior"
#option 1: #option 1:
self._woodbury_chol = woodbury_chol self._woodbury_chol = woodbury_chol

View file

@ -80,6 +80,9 @@ class RBF(Kernpart):
self._X, self._X2 = np.empty(shape=(2, 1)) self._X, self._X2 = np.empty(shape=(2, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def K(self, X, X2, target): def K(self, X, X2, target):
self._K_computations(X, X2) self._K_computations(X, X2)
target += self.variance * self._K_dvar target += self.variance * self._K_dvar
@ -87,6 +90,20 @@ class RBF(Kernpart):
def Kdiag(self, X, target): def Kdiag(self, X, target):
np.add(target, self.variance, target) np.add(target, self.variance, target)
def psi0(self, Z, mu, S, target):
target += self.variance
def psi1(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi1
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X):
self._K_computations(X, X2) self._K_computations(X, X2)
self.variance.gradient = np.sum(self._K_dvar * dL_dK) self.variance.gradient = np.sum(self._K_dvar * dL_dK)
@ -150,9 +167,7 @@ class RBF(Kernpart):
else: else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def _gradients_X(self, dL_dK, X, X2, target):
def dK_dX(self, dL_dK, X, X2, target):
#if self._X is None or X.base is not self._X.base or X2 is not None: #if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2) self._K_computations(X, X2)
if X2 is None: if X2 is None:
@ -165,22 +180,13 @@ class RBF(Kernpart):
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass
#---------------------------------------# #---------------------------------------#
# PSI statistics # # PSI statistics #
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, mu, S, target):
target += self.variance
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass pass
def psi1(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi1
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom)) denominator = (self.lengthscale2 * (self._psi1_denom))
@ -193,10 +199,6 @@ class RBF(Kernpart):
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
@ -283,6 +285,7 @@ class RBF(Kernpart):
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
X, X2 = param_to_array(X, X2) X, X2 = param_to_array(X, X2)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options) weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
return target

View file

@ -19,18 +19,7 @@ class White(Kernpart):
self.input_dim = input_dim self.input_dim = input_dim
self.variance = Param('variance', variance) self.variance = Param('variance', variance)
self.add_parameters(self.variance) self.add_parameters(self.variance)
# self._set_params(np.array([variance]).flatten())
self._psi1 = 0 # TODO: more elegance here self._psi1 = 0 # TODO: more elegance here
# def _get_params(self):
# return self.variance
#
# def _set_params(self,x):
# assert x.shape==(1,)
# self.variance = x
#
# def _get_param_names(self):
# return ['variance']
def K(self,X,X2,target): def K(self,X,X2,target):
if X2 is None: if X2 is None:
@ -39,14 +28,19 @@ class White(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target): def update_gradients_full(self, dL_dK, X):
if X2 is None: self.variance.gradient = np.trace(dL_dK)
target += np.trace(dL_dK)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(dL_dKdiag) target += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
pass pass
def dKdiag_dX(self,dL_dKdiag,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):

View file

@ -52,12 +52,8 @@ class Gaussian(Likelihood):
def covariance_matrix(self, Y, Y_metadata=None): def covariance_matrix(self, Y, Y_metadata=None):
return np.eye(Y.shape[0]) * self.variance return np.eye(Y.shape[0]) * self.variance
def _gradients(self, partial): def set_gradients(self, partial):
""" self.variance.gradient = np.sum(partial)
Return the derivative of the log marginal likelihood wrt self.variance,
given the appropriate partial derivative
"""
return np.sum(partial)
def _preprocess_values(self, Y): def _preprocess_values(self, Y):
""" """