From 7812a7dc7d8cbf622141d9a9469a4e86e807fb29 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 5 Dec 2013 10:56:18 +0000 Subject: [PATCH] moved functional part of sparseGP to inference/dtcvar --- GPy/core/sparse_gp.py | 116 -------------- .../latent_function_inference/dtcvar.py | 149 ++++++++++++++++++ 2 files changed, 149 insertions(+), 116 deletions(-) create mode 100644 GPy/inference/latent_function_inference/dtcvar.py diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 2ac47419..6445988a 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -87,122 +87,6 @@ class SparseGP(GPBase): self.psi0 = self.kern.Kdiag(self.X) self.psi1 = self.kern.K(self.X, self.Z) self.psi2 = None - - def _computations(self): - if self._const_jitter is None or not(self._const_jitter.shape[0] == self.num_inducing): - self._const_jitter = np.eye(self.num_inducing) * 1e-8 - - # factor Kmm - self._Lm = jitchol(self.Kmm + self._const_jitter) - - # The rather complex computations of self._A - if self.has_uncertain_inputs: - if self.likelihood.is_heteroscedastic: - psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0) - else: - psi2_beta = self.psi2.sum(0) * self.likelihood.precision - evals, evecs = linalg.eigh(psi2_beta) - clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable - if not np.array_equal(evals, clipped_evals): - pass # print evals - tmp = evecs * np.sqrt(clipped_evals) - tmp = tmp.T - else: - if self.likelihood.is_heteroscedastic: - tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data, 1))) - else: - tmp = self.psi1 * (np.sqrt(self.likelihood.precision)) - tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1) - self._A = tdot(tmp) - - # factor B - self.B = np.eye(self.num_inducing) + self._A - self.LB = jitchol(self.B) - - # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! - self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor) - - # back substutue C into psi1Vf - tmp, info1 = dtrtrs(self._Lm, np.asfortranarray(self.psi1Vf), lower=1, trans=0) - self._LBi_Lmi_psi1Vf, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0) - # tmp, info2 = dpotrs(self.LB, tmp, lower=1) - tmp, info2 = dtrtrs(self.LB, self._LBi_Lmi_psi1Vf, lower=1, trans=1) - self.Cpsi1Vf, info3 = dtrtrs(self._Lm, tmp, lower=1, trans=1) - - # Compute dL_dKmm - tmp = tdot(self._LBi_Lmi_psi1Vf) - self.data_fit = np.trace(tmp) - self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp) - tmp = -0.5 * self.DBi_plus_BiPBi - tmp += -0.5 * self.B * self.output_dim - tmp += self.output_dim * np.eye(self.num_inducing) - self.dL_dKmm = backsub_both_sides(self._Lm, tmp) - - # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case - self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten() - self.dL_dpsi1 = np.dot(self.likelihood.VVT_factor, self.Cpsi1Vf.T) - dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) - - if self.likelihood.is_heteroscedastic: - - if self.has_uncertain_inputs: - self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] - else: - self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (self.psi1 * self.likelihood.precision.reshape(self.num_data, 1)).T).T - self.dL_dpsi2 = None - else: - dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta - if self.has_uncertain_inputs: - # repeat for each of the N psi_2 matrices - self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0) - else: - # subsume back into psi1 (==Kmn) - self.dL_dpsi1 += 2.*np.dot(self.psi1, dL_dpsi2) - self.dL_dpsi2 = None - - - # the partial derivative vector for the likelihood - if self.likelihood.size == 0: - # save computation here. - self.partial_for_likelihood = None - elif self.likelihood.is_heteroscedastic: - - if self.has_uncertain_inputs: - raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" - - else: - - LBi = chol_inv(self.LB) - Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0) - _LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) - - - self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2 - self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2 - - self.partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*self.likelihood.precision**2 - - self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2 - self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2 - - else: - # likelihood is not heteroscedatic - self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 - self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision) - self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit) - - def log_likelihood(self): - """ Compute the (lower bound on the) log marginal likelihood """ - if self.likelihood.is_heteroscedastic: - A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) - B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self._A)) - else: - A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT - B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self._A)) - C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2)) - D = 0.5 * self.data_fit - return A + B + C + D + self.likelihood.Z - def parameters_changed(self): self._compute_kernel_matrices() self._computations() diff --git a/GPy/inference/latent_function_inference/dtcvar.py b/GPy/inference/latent_function_inference/dtcvar.py new file mode 100644 index 00000000..7a956a9a --- /dev/null +++ b/GPy/inference/latent_function_inference/dtcvar.py @@ -0,0 +1,149 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from posterior import Posterior +from .../util.linalg import pdinv, dpotrs, tdot +log_2_pi = np.log(2*np.pi) + +class DTCVar(object): + """ + An object for inference when the likelihood is Gaussian, but we want to do sparse inference. + + The function self.inference returns a Posterior object, which summarizes + the posterior. + + For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it. + + """ + def __init__(self): + self._YYTfactor_cache = caching.cache() + self.const_jitter = 1e-6 + + def get_YYTfactor(self, Y): + """ + find a matrix L which satisfies LLT = YYT. + + Note that L may have fewer columns than Y. + """ + N, D = Y.shape + if (N>D): + return Y + else: + #if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L. + raise NotImplementedError, 'TODO' #TODO + + def inference(self, Kmm, Kmn, Knn_diag, likelihood, Y): + + num_inducing, num_data = Kmn.shape + const_jitter = np.eye(num_inducing) * self.const_jitter + + #factor Kmm # TODO: cache? + _Lm = jitchol(Kmm + _const_jitter) + + # The rather complex computations of A + if has_uncertain_inputs: + if likelihood.is_heteroscedastic: + psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0) + else: + psi2_beta = psi2.sum(0) * likelihood.precision + evals, evecs = linalg.eigh(psi2_beta) + clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable + if not np.array_equal(evals, clipped_evals): + pass # print evals + tmp = evecs * np.sqrt(clipped_evals) + tmp = tmp.T + else: + if likelihood.is_heteroscedastic: + tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1))) + else: + tmp = psi1 * (np.sqrt(likelihood.precision)) + tmp, _ = dtrtrs(_Lm, np.asfortranarray(tmp.T), lower=1) + A = tdot(tmp) + + # factor B + B = np.eye(num_inducing) + A + LB = jitchol(B) + + # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! + psi1Vf = np.dot(psi1.T, likelihood.VVT_factor) + + # back substutue C into psi1Vf + tmp, info1 = dtrtrs(_Lm, np.asfortranarray(psi1Vf), lower=1, trans=0) + _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, np.asfortranarray(tmp), lower=1, trans=0) + # tmp, info2 = dpotrs(LB, tmp, lower=1) + tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) + Cpsi1Vf, info3 = dtrtrs(_Lm, tmp, lower=1, trans=1) + + # Compute dL_dKmm + tmp = tdot(_LBi_Lmi_psi1Vf) + data_fit = np.trace(tmp) + DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + tmp) + tmp = -0.5 * DBi_plus_BiPBi + tmp += -0.5 * B * output_dim + tmp += output_dim * np.eye(num_inducing) + dL_dKmm = backsub_both_sides(_Lm, tmp) + + # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case + dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten() + dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T) + dL_dpsi2_beta = 0.5 * backsub_both_sides(_Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) + + if likelihood.is_heteroscedastic: + + if has_uncertain_inputs: + dL_dpsi2 = likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] + else: + dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * likelihood.precision.reshape(num_data, 1)).T).T + dL_dpsi2 = None + else: + dL_dpsi2 = likelihood.precision * dL_dpsi2_beta + if has_uncertain_inputs: + # repeat for each of the N psi_2 matrices + dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0) + else: + # subsume back into psi1 (==Kmn) + dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2) + dL_dpsi2 = None + + + # the partial derivative vector for the likelihood + if likelihood.size == 0: + # save computation here. + partial_for_likelihood = None + elif likelihood.is_heteroscedastic: + + if has_uncertain_inputs: + raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" + + else: + + LBi = chol_inv(LB) + Lmi_psi1, nil = dtrtrs(_Lm, np.asfortranarray(psi1.T), lower=1, trans=0) + _LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) + + + partial_for_likelihood = -0.5 * likelihood.precision + 0.5 * likelihood.V**2 + partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * likelihood.precision**2 + + partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*likelihood.precision**2 + + partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * likelihood.precision**2 + partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * likelihood.precision**2 + + else: + # likelihood is not heteroscedatic + partial_for_likelihood = -0.5 * num_data * output_dim * likelihood.precision + 0.5 * likelihood.trYYT * likelihood.precision ** 2 + partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision) + partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit) + + #def log_likelihood(self): + if likelihood.is_heteroscedastic: + A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y) + B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A)) + else: + A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT + B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A)) + C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2)) + D = 0.5 * data_fit + return A + B + C + D + likelihood.Z +