diff --git a/GPy/core/gp.py b/GPy/core/gp.py index fbabf5f6..060b617a 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -48,9 +48,9 @@ class GP(Model): if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): inference_method = exact_gaussian_inference.ExactGaussianInference() - else: - inference_method = expectation_propagation - print "defaulting to ", inference_method, "for latent function inference" + else: + inference_method = expectation_propagation + print "defaulting to ", inference_method, "for latent function inference" self.inference_method = inference_method self.add_parameter(self.kern) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index e33703d8..3152d4b5 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -6,6 +6,7 @@ from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv, from gp import GP from parameterization.param import Param from ..inference.latent_function_inference import varDTC +from .. import likelihoods class SparseGP(GP): """ @@ -35,24 +36,21 @@ class SparseGP(GP): #pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): - inference_method = varDTC.Gaussian_inference() + inference_method = varDTC.VarDTC() else: #inference_method = ?? raise NotImplementedError, "what to do what to do?" print "defaulting to ", inference_method, "for latent function inference" - GP.__init__(self, X, Y, likelihood, inference_method, kernel, name) self.Z = Z self.num_inducing = Z.shape[0] - if X_variance is None: - self.has_uncertain_inputs = False - self.X_variance = None - else: + if not (X_variance is None): assert X_variance.shape == X.shape - self.has_uncertain_inputs = True - self.X_variance = X_variance + self.X_variance = X_variance + + GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name) self.Z = Param('inducing inputs', self.Z) self.add_parameter(self.Z, gradient=self.dL_dZ, index=0) diff --git a/GPy/inference/latent_function_inference/varDTC.py b/GPy/inference/latent_function_inference/varDTC.py index 0cd709b2..7ceeff11 100644 --- a/GPy/inference/latent_function_inference/varDTC.py +++ b/GPy/inference/latent_function_inference/varDTC.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from posterior import Posterior -from ...util.linalg import pdinv, dpotrs, tdot +from ...util.linalg import jitchol, backsub_both_sides, tdot import numpy as np log_2_pi = np.log(2*np.pi) @@ -17,7 +17,7 @@ class VarDTC(object): """ def __init__(self): - self._YYTfactor_cache = caching.cache() + #self._YYTfactor_cache = caching.cache() self.const_jitter = 1e-6 def get_YYTfactor(self, Y): @@ -42,7 +42,13 @@ class VarDTC(object): num_data, output_dim = Y.shape #see whether we're using variational uncertain inputs - uncertain_inputs = (X_variance is None) + uncertain_inputs = not (X_variance is None) + + #see whether we've got a different noise variance for each datum + beta = 1./np.squeeze(likelihood.variance) + het_noise = False + if beta.size <1: + het_noise = True # kernel computations, using BGPLVM notation Kmm = kern.K(Z) @@ -59,7 +65,7 @@ class VarDTC(object): # The rather complex computations of A if uncertain_inputs: - if likelihood.is_heteroscedastic: + if het_noise: psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0) else: psi2_beta = psi2.sum(0) * likelihood.precision @@ -70,10 +76,10 @@ class VarDTC(object): tmp = evecs * np.sqrt(clipped_evals) tmp = tmp.T else: - if likelihood.is_heteroscedastic: - tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1))) + if het_noise: + tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) else: - tmp = psi1 * (np.sqrt(likelihood.precision)) + tmp = psi1 * (np.sqrt(beta)) tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1) A = tdot(tmp) @@ -82,7 +88,7 @@ class VarDTC(object): LB = jitchol(B) # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! - VVT_factor = self.get_VVTfactor(Y, likelihood.precision) + VVT_factor = self.get_VVTfactor(Y, beta) psi1Vf = np.dot(psi1.T, VVT_factor) # back substutue C into psi1Vf @@ -92,12 +98,12 @@ class VarDTC(object): Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1) #compute log marginal likelihood - if likelihood.is_heteroscedastic: - A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y) - B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A)) + if het_noise: + A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y) + B = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(_A)) else: - A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT - B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A)) + A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(beta)) - 0.5 * beta * likelihood.trYYT + B = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(_A)) C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2)) D = 0.5 * data_fit log_marginal = A + B + C + D @@ -112,21 +118,20 @@ class VarDTC(object): tmp += output_dim * np.eye(num_inducing) dL_dKmm = backsub_both_sides(Lm, tmp) - # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case - dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten() + # Compute dL_dpsi + dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten() dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T) dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) - if likelihood.is_heteroscedastic: - - if has_uncertain_inputs: - dL_dpsi2 = likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] + if het_noise: + if uncertain_inputs: + dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :] else: dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * likelihood.precision.reshape(num_data, 1)).T).T dL_dpsi2 = None else: dL_dpsi2 = likelihood.precision * dL_dpsi2_beta - if has_uncertain_inputs: + if uncertain_inputs: # repeat for each of the N psi_2 matrices dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0) else: @@ -139,18 +144,14 @@ class VarDTC(object): if likelihood.size == 0: # save computation here. partial_for_likelihood = None - elif likelihood.is_heteroscedastic: - - if has_uncertain_inputs: + elif het_noise: + if uncertain_inputs: raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" - else: - LBi = chol_inv(LB) Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0) _LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) - partial_for_likelihood = -0.5 * likelihood.precision + 0.5 * likelihood.V**2 partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * likelihood.precision**2 @@ -165,15 +166,26 @@ class VarDTC(object): partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision) partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit) + #put the gradients in the right places + likelihood.update_gradients(np.diag(partial_for_likelihood)) + + if uncertain_inputs: + kern.update_gradients_variational(??) + grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2} + else: + kern.update_gradients_sparse(??) + grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2} #get sufficient things for posterior prediction if VVT_factor.shape[1] == Y.shape[1]: - Cpsi1V = Cpsi1Vf + woodbury_vector = Cpsi1Vf # == Cpsi1V + woodbury_chol = ?? else: raise NotImplementedError #TODO #construct a posterior object - post = Posterior(woodbury_chol=None, woodbury_vector=Cpsi1V, K=None, mean=None, cov=None, K_chol=None) - return + post = Posterior(woodbury_chol=woodbury_chol, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) + + return Posterior, log_marginal, grad_dict