From 58a05f37b768bd7e0ce4c861f9685a1f6c277e8e Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 15 May 2014 14:06:00 +0100 Subject: [PATCH 1/4] [latentfunctioninference] superclass LatentFunctionInference added, which contains a call just before and just after optimization --- GPy/core/gp.py | 20 ++++++++++- GPy/core/model.py | 2 +- .../latent_function_inference/__init__.py | 35 +++++++++++++++++-- .../latent_function_inference/dtc.py | 3 +- .../exact_gaussian_inference.py | 3 +- .../expectation_propagation.py | 3 +- .../latent_function_inference/fitc.py | 3 +- .../latent_function_inference/laplace.py | 3 +- .../latent_function_inference/var_dtc.py | 5 +-- .../latent_function_inference/var_dtc_gpu.py | 3 +- .../var_dtc_parallel.py | 3 +- 11 files changed, 69 insertions(+), 14 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 62e16de1..29d9032a 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -10,7 +10,7 @@ from model import Model from parameterization import ObsAr from .. import likelihoods from ..likelihoods.gaussian import Gaussian -from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation +from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference from parameterization.variational import VariationalPosterior class GP(Model): @@ -21,6 +21,7 @@ class GP(Model): :param Y: output observations :param kernel: a GPy kernel, defaults to rbf+white :param likelihood: a GPy likelihood + :param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP :rtype: model object .. Note:: Multiple independent outputs are allowed using columns of Y @@ -220,3 +221,20 @@ class GP(Model): """ return self.kern.input_sensitivity() + def optimize(self, optimizer=None, start=None, **kwargs): + """ + Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. + kwargs are passed to the optimizer. They can be: + + :param max_f_eval: maximum number of function evaluations + :type max_f_eval: int + :messages: whether to display during optimisation + :type messages: bool + :param optimizer: which optimizer to use (defaults to self.preferred optimizer) + :type optimizer: string + + TODO: valid args + """ + self.inference_method.on_optimization_start() + super(GP, self).optimize(optimizer, start, **kwargs) + self.inference_method.on_optimization_end() \ No newline at end of file diff --git a/GPy/core/model.py b/GPy/core/model.py index 38e8d4cf..71b21af6 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -220,7 +220,7 @@ class Model(Parameterized): if self.is_fixed: raise RuntimeError, "Cannot optimize, when everything is fixed" if self.size == 0: - raise RuntimeError, "Model without parameters cannot be minimized" + raise RuntimeError, "Model without parameters cannot be optimized" if optimizer is None: optimizer = self.preferred_optimizer diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 68004a08..878c1e4f 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -25,6 +25,20 @@ etc. """ +class LatentFunctionInference(object): + def on_optimization_start(self): + """ + This function gets called, just before the optimization loop to start. + """ + pass + + def on_optimization_end(self): + """ + This function gets called, just after the optimization loop ended. + """ + pass + + from exact_gaussian_inference import ExactGaussianInference from laplace import Laplace from GPy.inference.latent_function_inference.var_dtc import VarDTC @@ -38,11 +52,26 @@ from var_dtc_gpu import VarDTC_GPU # class FullLatentFunctionData(object): # # -# class LatentFunctionInference(object): -# def inference(self, kern, X, likelihood, Y, Y_metadata=None): + +# class EMLikeLatentFunctionInference(LatentFunctionInference): +# def update_approximation(self): +# """ +# This function gets called when the +# """ +# +# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): # """ # Do inference on the latent functions given a covariance function `kern`, -# inputs and outputs `X` and `Y`, and a likelihood `likelihood`. +# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`. +# Additional metadata for the outputs `Y` can be given in `Y_metadata`. +# """ +# raise NotImplementedError, "Abstract base class for full inference" +# +# class VariationalLatentFunctionInference(LatentFunctionInference): +# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): +# """ +# Do inference on the latent functions given a covariance function `kern`, +# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`. # Additional metadata for the outputs `Y` can be given in `Y_metadata`. # """ # raise NotImplementedError, "Abstract base class for full inference" diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py index 1a84da6b..1b6b1dbd 100644 --- a/GPy/inference/latent_function_inference/dtc.py +++ b/GPy/inference/latent_function_inference/dtc.py @@ -4,9 +4,10 @@ from posterior import Posterior from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv import numpy as np +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class DTC(object): +class DTC(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index c0177e9f..0c02efe3 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -5,10 +5,11 @@ from posterior import Posterior from ...util.linalg import pdinv, dpotrs, tdot from ...util import diag import numpy as np +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class ExactGaussianInference(object): +class ExactGaussianInference(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian. diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py index 172f43fb..c2dea824 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation.py +++ b/GPy/inference/latent_function_inference/expectation_propagation.py @@ -1,9 +1,10 @@ import numpy as np from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs from posterior import Posterior +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class EP(object): +class EP(LatentFunctionInference): def __init__(self, epsilon=1e-6, eta=1., delta=1.): """ The expectation-propagation algorithm. diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py index de47e5d5..a184c6c4 100644 --- a/GPy/inference/latent_function_inference/fitc.py +++ b/GPy/inference/latent_function_inference/fitc.py @@ -5,9 +5,10 @@ from posterior import Posterior from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv from ...util import diag import numpy as np +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class FITC(object): +class FITC(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 9ba3f83f..1c153518 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -16,8 +16,9 @@ from ...util.misc import param_to_array from posterior import Posterior import warnings from scipy import optimize +from . import LatentFunctionInference -class Laplace(object): +class Laplace(LatentFunctionInference): def __init__(self): """ diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 0cc841ed..3043a7e8 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -7,9 +7,10 @@ from ...util import diag from ...core.parameterization.variational import VariationalPosterior import numpy as np from ...util.misc import param_to_array +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class VarDTC(object): +class VarDTC(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. @@ -190,7 +191,7 @@ class VarDTC(object): post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) return post, log_marginal, grad_dict -class VarDTCMissingData(object): +class VarDTCMissingData(LatentFunctionInference): const_jitter = 1e-6 def __init__(self, limit=1, inan=None): from ...util.caching import Cacher diff --git a/GPy/inference/latent_function_inference/var_dtc_gpu.py b/GPy/inference/latent_function_inference/var_dtc_gpu.py index 9b2da1c9..d346d01f 100644 --- a/GPy/inference/latent_function_inference/var_dtc_gpu.py +++ b/GPy/inference/latent_function_inference/var_dtc_gpu.py @@ -7,6 +7,7 @@ from ...util import diag from ...core.parameterization.variational import VariationalPosterior import numpy as np from ...util.misc import param_to_array +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) from ...util import gpu_init @@ -19,7 +20,7 @@ try: except: pass -class VarDTC_GPU(object): +class VarDTC_GPU(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py index 87236e2a..ffda0aba 100644 --- a/GPy/inference/latent_function_inference/var_dtc_parallel.py +++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py @@ -7,9 +7,10 @@ from ...util import diag from ...core.parameterization.variational import VariationalPosterior import numpy as np from ...util.misc import param_to_array +from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) -class VarDTC_minibatch(object): +class VarDTC_minibatch(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian, but we want to do sparse inference. From 75d5209b98ff4e275eab2a690acece327f4ec4c7 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 15 May 2014 14:10:24 +0100 Subject: [PATCH 2/4] minor changes --- .../expectation_propagation_dtc.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py index 3625a5bf..be1efd3a 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py +++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py @@ -5,7 +5,13 @@ from posterior import Posterior log_2_pi = np.log(2*np.pi) class EPDTC(EP): - #def __init__(self, epsilon=1e-6, eta=1., delta=1.): + def __init__(self, epsilon=1e-6, eta=1., delta=1.): + self.epsilon, self.eta, self.delta = epsilon, eta, delta + self.reset() + + def reset(self): + self.old_mutilde, self.old_vtilde = None, None + self._ep_approximation = None def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): num_data, output_dim = X.shape @@ -20,8 +26,10 @@ class EPDTC(EP): KmmiKmn = np.dot(Kmmi,Kmn) K = np.dot(Kmn.T,KmmiKmn) - - mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) + if self._ep_approximation is None: + mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) + else: + mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) From bd7a80dde5ce80585458bd40316badefc28f704c Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 15 May 2014 14:11:11 +0100 Subject: [PATCH 3/4] flags added --- .../latent_function_inference/expectation_propagation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py index 172f43fb..603ff68c 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation.py +++ b/GPy/inference/latent_function_inference/expectation_propagation.py @@ -21,6 +21,7 @@ class EP(object): def reset(self): self.old_mutilde, self.old_vtilde = None, None + self._ep_approximation = None def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None): num_data, output_dim = X.shape @@ -28,7 +29,10 @@ class EP(object): K = kern.K(X) - mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(K, Y, likelihood, Y_metadata) + if self._ep_approximation is None: + mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) + else: + mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) From 3d76664af054eac7a53e1a9fffeb3cf800330f1e Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 15 May 2014 16:36:03 +0100 Subject: [PATCH 4/4] EP is back. --- GPy/examples/classification.py | 9 +- .../expectation_propagation.py | 10 +- .../expectation_propagation_dtc.py | 243 ++++++++++++++++-- GPy/likelihoods/bernoulli.py | 3 + 4 files changed, 238 insertions(+), 27 deletions(-) diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index 2b0a201d..ae9d8eb8 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -96,15 +96,11 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot= # Optimize if optimize: - #m.update_likelihood_approximation() - # Parameters optimization: try: m.optimize('scg', messages=1) except Exception as e: return m - #m.pseudo_EM() - # Plot if plot: fig, axes = pb.subplots(2, 1) @@ -133,10 +129,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti # Optimize if optimize: - #m.update_likelihood_approximation() - # Parameters optimization: - #m.optimize() - m.pseudo_EM() + m.optimize() # Plot if plot: diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py index 870f2310..de8b0931 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation.py +++ b/GPy/inference/latent_function_inference/expectation_propagation.py @@ -24,6 +24,13 @@ class EP(LatentFunctionInference): self.old_mutilde, self.old_vtilde = None, None self._ep_approximation = None + def on_optimization_start(self): + self._ep_approximation = None + + def on_optimization_end(self): + # TODO: update approximation in the end as well? Maybe even with a switch? + pass + def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None): num_data, output_dim = X.shape assert output_dim ==1, "ep in 1D only (for now!)" @@ -47,8 +54,6 @@ class EP(LatentFunctionInference): return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} - - def expectation_propagation(self, K, Y, likelihood, Y_metadata): num_data, data_dim = Y.shape @@ -113,4 +118,3 @@ class EP(LatentFunctionInference): mu_tilde = v_tilde/tau_tilde return mu, Sigma, mu_tilde, tau_tilde, Z_hat - diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py index be1efd3a..85d8cc89 100644 --- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py +++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py @@ -1,14 +1,56 @@ import numpy as np -from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs -from expectation_propagation import EP +from ...util import diag +from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR +from ...util.misc import param_to_array +from ...core.parameterization.variational import VariationalPosterior +from . import LatentFunctionInference from posterior import Posterior log_2_pi = np.log(2*np.pi) -class EPDTC(EP): - def __init__(self, epsilon=1e-6, eta=1., delta=1.): +class EPDTC(LatentFunctionInference): + const_jitter = 1e-6 + def __init__(self, epsilon=1e-6, eta=1., delta=1., limit=1): + from ...util.caching import Cacher + self.limit = limit + self.get_trYYT = Cacher(self._get_trYYT, limit) + self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) + self.epsilon, self.eta, self.delta = epsilon, eta, delta self.reset() + def set_limit(self, limit): + self.get_trYYT.limit = limit + self.get_YYTfactor.limit = limit + + def _get_trYYT(self, Y): + return param_to_array(np.sum(np.square(Y))) + + def __getstate__(self): + # has to be overridden, as Cacher objects cannot be pickled. + return self.limit + + def __setstate__(self, state): + # has to be overridden, as Cacher objects cannot be pickled. + self.limit = state + from ...util.caching import Cacher + self.get_trYYT = Cacher(self._get_trYYT, self.limit) + self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit) + + def _get_YYTfactor(self, Y): + """ + find a matrix L which satisfies LLT = YYT. + + Note that L may have fewer columns than Y. + """ + N, D = Y.shape + if (N>=D): + return param_to_array(Y) + else: + return jitchol(tdot(Y)) + + def get_VVTfactor(self, Y, prec): + return Y * prec # TODO chache this, and make it effective + def reset(self): self.old_mutilde, self.old_vtilde = None, None self._ep_approximation = None @@ -20,28 +62,131 @@ class EPDTC(EP): Kmm = kern.K(Z) Kmn = kern.K(Z,X) - Lm = jitchol(Kmm) - Lmi = dtrtrs(Lm,np.eye(Lm.shape[0]))[0] - Kmmi = np.dot(Lmi.T,Lmi) - KmmiKmn = np.dot(Kmmi,Kmn) - K = np.dot(Kmn.T,KmmiKmn) - if self._ep_approximation is None: mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) else: mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation - Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) - alpha, _ = dpotrs(LW, mu_tilde, lower=1) + if isinstance(X, VariationalPosterior): + uncertain_inputs = True + psi0 = kern.psi0(Z, X) + psi1 = Kmn.T#kern.psi1(Z, X) + psi2 = kern.psi2(Z, X) + else: + uncertain_inputs = False + psi0 = kern.Kdiag(X) + psi1 = Kmn.T#kern.K(X, Z) + psi2 = None - log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat?? + #see whether we're using variational uncertain inputs - dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi) + _, output_dim = Y.shape + + #see whether we've got a different noise variance for each datum + #beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) + beta = tau_tilde + VVT_factor = beta[:,None]*mu_tilde[:,None] + trYYT = self.get_trYYT(mu_tilde[:,None]) + + # do the inference: + het_noise = beta.size > 1 + num_inducing = Z.shape[0] + num_data = Y.shape[0] + # kernel computations, using BGPLVM notation + + Kmm = kern.K(Z).copy() + diag.add(Kmm, self.const_jitter) + Lm = jitchol(Kmm) + + # The rather complex computations of A + if uncertain_inputs: + if het_noise: + psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) + else: + psi2_beta = psi2.sum(0) * beta + LmInv = dtrtri(Lm) + A = LmInv.dot(psi2_beta.dot(LmInv.T)) + else: + if het_noise: + tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) + else: + tmp = psi1 * (np.sqrt(beta)) + tmp, _ = dtrtrs(Lm, tmp.T, lower=1) + A = tdot(tmp) #print A.sum() + + # factor B + B = np.eye(num_inducing) + A + LB = jitchol(B) + psi1Vf = np.dot(psi1.T, VVT_factor) + # back substutue C into psi1Vf + tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) + _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) + tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) + Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + + # data fit and derivative of L w.r.t. Kmm + delit = tdot(_LBi_Lmi_psi1Vf) + data_fit = np.trace(delit) + DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) + delit = -0.5 * DBi_plus_BiPBi + delit += -0.5 * B * output_dim + delit += output_dim * np.eye(num_inducing) + # Compute dL_dKmm + dL_dKmm = backsub_both_sides(Lm, delit) + + # derivatives of L w.r.t. psi + dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, + VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, + psi1, het_noise, uncertain_inputs) + + # log marginal likelihood + log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, + psi0, A, LB, trYYT, data_fit, VVT_factor) + + #put the gradients in the right places + dL_dR = _compute_dL_dR(likelihood, + het_noise, uncertain_inputs, LB, + _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, + psi0, psi1, beta, + data_fit, num_data, output_dim, trYYT, mu_tilde[:,None]) + + dL_dthetaL = 0#likelihood.exact_inference_gradients(dL_dR,Y_metadata) + + if uncertain_inputs: + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dpsi0':dL_dpsi0, + 'dL_dpsi1':dL_dpsi1, + 'dL_dpsi2':dL_dpsi2, + 'dL_dthetaL':dL_dthetaL} + else: + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dKdiag':dL_dpsi0, + 'dL_dKnm':dL_dpsi1, + 'dL_dthetaL':dL_dthetaL} + + #get sufficient things for posterior prediction + #TODO: do we really want to do this in the loop? + if VVT_factor.shape[1] == Y.shape[1]: + woodbury_vector = Cpsi1Vf # == Cpsi1V + else: + print 'foobar' + psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T + tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + tmp, _ = dpotrs(LB, tmp, lower=1) + woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + Bi, _ = dpotri(LB, lower=1) + symmetrify(Bi) + Bi = -dpotri(LB, lower=1)[0] + diag.add(Bi, 1) + + woodbury_inv = backsub_both_sides(Lm, Bi) + + #construct a posterior object + post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) + return post, log_marginal, grad_dict - dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters - return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} @@ -129,3 +274,69 @@ class EPDTC(EP): mu_tilde = v_tilde/tau_tilde return mu, Sigma, mu_tilde, tau_tilde, Z_hat + +def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): + dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten() + dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T) + dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) + if het_noise: + if uncertain_inputs: + dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :] + else: + dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T + dL_dpsi2 = None + else: + dL_dpsi2 = beta * dL_dpsi2_beta + if uncertain_inputs: + # repeat for each of the N psi_2 matrices + dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0) + else: + # subsume back into psi1 (==Kmn) + dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2) + dL_dpsi2 = None + + return dL_dpsi0, dL_dpsi1, dL_dpsi2 + + +def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y): + # the partial derivative vector for the likelihood + if likelihood.size == 0: + # save computation here. + dL_dR = None + elif het_noise: + if uncertain_inputs: + raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" + else: + #from ...util.linalg import chol_inv + #LBi = chol_inv(LB) + LBi, _ = dtrtrs(LB,np.eye(LB.shape[0])) + + Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0) + _LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0) + + dL_dR = -0.5 * beta + 0.5 * (beta*Y)**2 + dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2 + + dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2 + + dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2 + dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2 + else: + # likelihood is not heteroscedatic + dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2 + dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta) + dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit) + return dL_dR + +def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit,Y): + #compute log marginal likelihood + if het_noise: + lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * np.square(Y).sum(axis=-1)) + lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A)) + else: + lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT + lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) + lik_3 = -output_dim * (np.sum(np.log(np.diag(LB)))) + lik_4 = 0.5 * data_fit + log_marginal = lik_1 + lik_2 + lik_3 + lik_4 + return log_marginal diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 7b867954..6c53958b 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -227,3 +227,6 @@ class Bernoulli(Likelihood): ns = np.ones_like(gp, dtype=int) Ysim = np.random.binomial(ns, self.gp_link.transf(gp)) return Ysim.reshape(orig_shape) + + def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): + pass