diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index d6f6c406..d9d344ab 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -24,7 +24,7 @@ class ExactGaussianInference(object): """ find a matrix L which satisfies LL^T = YY^T. - Note that L may have fewer columns than Y, else L=Y. + Note that L may have fewer columns than Y, else L=Y. """ N, D = Y.shape if (N>D): @@ -33,22 +33,26 @@ class ExactGaussianInference(object): #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. raise NotImplementedError, 'TODO' #TODO - def inference(self, K, likelihood, Y, Y_metadata=None): + def inference(self, kern, X, likelihood, Y, Y_metadata=None): """ Returns a Posterior class containing essential quantities of the posterior """ YYT_factor = self.get_YYTfactor(Y) + K = kern.K(X) + Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata)) alpha, _ = dpotrs(LW, YYT_factor, lower=1) - dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi) - log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor)) - dL_dtheta_lik = likelihood._gradients(np.diag(dL_dK)) + dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi) - return Posterior(log_marginal, dL_dK, dL_dtheta_lik, LW, alpha, K) + kern.update_gradients_full(dL_dK) + + likelihood.update_gradients(np.diag(dL_dK)) + + return Posterior(log_marginal, dL_dK, LW, alpha, K) diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 0def0c8b..2b2128db 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -17,7 +17,7 @@ from ..util.linalg import mdot, jitchol, pddet, dpotrs from functools import partial as partial_func import warnings -class Laplace(likelihood): +class LaplaceInference(object): """Laplace approximation to a posterior""" def __init__(self, data, noise_model, extra_data=None): diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index 8bfc1179..060c5352 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -14,7 +14,7 @@ class Posterior(object): schemes and the model classes. """ - def __init__(self, log_marginal, dL_dK, dL_dtheta_lik, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None): + def __init__(self, log_marginal, dL_dK, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None): """ log_marginal: log p(Y|X) dL_dK: d/dK log p(Y|X) @@ -56,7 +56,7 @@ class Posterior(object): if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)): pass # we have sufficient to compute the posterior else: - raise ValueError, "insufficient onformation to compute the posterior" + raise ValueError, "insufficient information to compute the posterior" #option 1: self._woodbury_chol = woodbury_chol diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 757ebd09..57ce2996 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -80,6 +80,9 @@ class RBF(Kernpart): self._X, self._X2 = np.empty(shape=(2, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S + + + def K(self, X, X2, target): self._K_computations(X, X2) target += self.variance * self._K_dvar @@ -87,6 +90,20 @@ class RBF(Kernpart): def Kdiag(self, X, target): np.add(target, self.variance, target) + def psi0(self, Z, mu, S, target): + target += self.variance + + def psi1(self, Z, mu, S, target): + self._psi_computations(Z, mu, S) + target += self._psi1 + + def psi2(self, Z, mu, S, target): + self._psi_computations(Z, mu, S) + target += self._psi2 + + + + def update_gradients_full(self, dL_dK, X): self._K_computations(X, X2) self.variance.gradient = np.sum(self._K_dvar * dL_dK) @@ -150,9 +167,7 @@ class RBF(Kernpart): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) - - - def dK_dX(self, dL_dK, X, X2, target): + def _gradients_X(self, dL_dK, X, X2, target): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) if X2 is None: @@ -165,22 +180,13 @@ class RBF(Kernpart): def dKdiag_dX(self, dL_dKdiag, X, target): pass - #---------------------------------------# # PSI statistics # #---------------------------------------# - def psi0(self, Z, mu, S, target): - target += self.variance - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): pass - def psi1(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += self._psi1 - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): self._psi_computations(Z, mu, S) denominator = (self.lengthscale2 * (self._psi1_denom)) @@ -193,10 +199,6 @@ class RBF(Kernpart): target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) - def psi2(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += self._psi2 - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): self._psi_computations(Z, mu, S) term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim @@ -283,6 +285,7 @@ class RBF(Kernpart): num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim X, X2 = param_to_array(X, X2) weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options) + return target diff --git a/GPy/kern/parts/white.py b/GPy/kern/parts/white.py index 671e85f5..a59c9f98 100644 --- a/GPy/kern/parts/white.py +++ b/GPy/kern/parts/white.py @@ -19,18 +19,7 @@ class White(Kernpart): self.input_dim = input_dim self.variance = Param('variance', variance) self.add_parameters(self.variance) -# self._set_params(np.array([variance]).flatten()) self._psi1 = 0 # TODO: more elegance here - -# def _get_params(self): -# return self.variance -# -# def _set_params(self,x): -# assert x.shape==(1,) -# self.variance = x -# -# def _get_param_names(self): -# return ['variance'] def K(self,X,X2,target): if X2 is None: @@ -39,14 +28,19 @@ class White(Kernpart): def Kdiag(self,X,target): target += self.variance - def dK_dtheta(self,dL_dK,X,X2,target): - if X2 is None: - target += np.trace(dL_dK) + def update_gradients_full(self, dL_dK, X): + self.variance.gradient = np.trace(dL_dK) + + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + raise NotImplementedError + + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + raise NotImplementedError def dKdiag_dtheta(self,dL_dKdiag,X,target): target += np.sum(dL_dKdiag) - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): pass def dKdiag_dX(self,dL_dKdiag,X,target): diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 8f2d2225..b32d353a 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -52,12 +52,8 @@ class Gaussian(Likelihood): def covariance_matrix(self, Y, Y_metadata=None): return np.eye(Y.shape[0]) * self.variance - def _gradients(self, partial): - """ - Return the derivative of the log marginal likelihood wrt self.variance, - given the appropriate partial derivative - """ - return np.sum(partial) + def set_gradients(self, partial): + self.variance.gradient = np.sum(partial) def _preprocess_values(self, Y): """