diff --git a/GPy/core/fitc.py b/GPy/core/fitc.py index eac00fec..643379f0 100644 --- a/GPy/core/fitc.py +++ b/GPy/core/fitc.py @@ -29,7 +29,7 @@ class FITC(SparseGP): SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False) assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs" - def update_likelihood_approximation(self): + def update_likelihood_approximation(self, **kwargs): """ Approximates a non-Gaussian likelihood using Expectation Propagation @@ -37,7 +37,7 @@ class FITC(SparseGP): this function does nothing """ self.likelihood.restart() - self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) + self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs) self._set_params(self._get_params()) def _compute_kernel_matrices(self): diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 63903242..2d826ac2 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -62,7 +62,7 @@ class GP(GPBase): def _get_param_names(self): return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() - def update_likelihood_approximation(self): + def update_likelihood_approximation(self, **kwargs): """ Approximates a non-gaussian likelihood using Expectation Propagation @@ -70,7 +70,7 @@ class GP(GPBase): this function does nothing """ self.likelihood.restart() - self.likelihood.fit_full(self.kern.K(self.X)) + self.likelihood.fit_full(self.kern.K(self.X), **kwargs) self._set_params(self._get_params()) # update the GP def _model_fit_term(self): diff --git a/GPy/core/model.py b/GPy/core/model.py index 89150b3a..daec00b6 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -538,22 +538,16 @@ class Model(Parameterized): return k.variances - def pseudo_EM(self, epsilon=.1, **kwargs): + def pseudo_EM(self, stop_crit=.1, **kwargs): """ - TODO: Should this not bein the GP class? EM - like algorithm for Expectation Propagation and Laplace approximation - kwargs are passed to the optimize function. They can be: + :stop_crit: convergence criterion + :type stop_crit: float - :epsilon: convergence criterion - :max_f_eval: maximum number of function evaluations - :messages: whether to display during optimisation - :param optimzer: whice optimizer to use (defaults to self.preferred optimizer) - :type optimzer: string TODO: valid strings? - - """ + ..Note: kwargs are passed to update_likelihood and optimize functions. """ assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods" - ll_change = epsilon + 1. + ll_change = stop_crit + 1. iteration = 0 last_ll = -np.inf @@ -561,10 +555,25 @@ class Model(Parameterized): alpha = 0 stop = False + #Handle **kwargs + ep_args = {} + for arg in kwargs.keys(): + if arg in ('epsilon','power_ep'): + ep_args[arg] = kwargs[arg] + del kwargs[arg] + while not stop: last_approximation = self.likelihood.copy() last_params = self._get_params() - self.update_likelihood_approximation() + if len(ep_args) == 2: + self.update_likelihood_approximation(epsilon=ep_args['epsilon'],power_ep=ep_args['power_ep']) + elif len(ep_args) == 1: + if ep_args.keys()[0] == 'epsilon': + self.update_likelihood_approximation(epsilon=ep_args['epsilon']) + elif ep_args.keys()[0] == 'power_ep': + self.update_likelihood_approximation(power_ep=ep_args['power_ep']) + else: + self.update_likelihood_approximation() new_ll = self.log_likelihood() ll_change = new_ll - last_ll @@ -576,7 +585,7 @@ class Model(Parameterized): else: self.optimize(**kwargs) last_ll = self.log_likelihood() - if ll_change < epsilon: + if ll_change < stop_crit: stop = True iteration += 1 if stop: diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index cb96b478..ede9f92d 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -215,7 +215,7 @@ class SparseGP(GPBase): #def _get_print_names(self): # return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() - def update_likelihood_approximation(self): + def update_likelihood_approximation(self, **kwargs): """ Approximates a non-gaussian likelihood using Expectation Propagation @@ -229,10 +229,10 @@ class SparseGP(GPBase): Kmmi = tdot(Lmi.T) diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)]) - self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion + self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi, **kwargs) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion # raise NotImplementedError, "EP approximation not implemented for uncertain inputs" else: - self.likelihood.fit_DTC(self.Kmm, self.psi1.T) + self.likelihood.fit_DTC(self.Kmm, self.psi1.T, **kwargs) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self._set_params(self._get_params()) # update the GP diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index 9a816e65..67bb79fc 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -4,18 +4,17 @@ from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs from likelihood import likelihood class EP(likelihood): - def __init__(self,data,noise_model,epsilon=1e-3,power_ep=[1.,1.]): + def __init__(self,data,noise_model): """ Expectation Propagation - Arguments - --------- - epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - noise_model : a likelihood function (see likelihood_functions.py) + :param data: data to model + :type data: numpy array + :param noise_model: noise distribution + :type noise_model: A GPy noise model + """ self.noise_model = noise_model - self.epsilon = epsilon - self.eta, self.delta = power_ep self.data = data self.N, self.output_dim = self.data.shape self.is_heteroscedastic = True @@ -87,11 +86,19 @@ class EP(likelihood): self.VVT_factor = self.V self.trYYT = np.trace(self.YYT) - def fit_full(self,K): + def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]): """ The expectation-propagation algorithm. For nomenclature see Rasmussen & Williams 2006. + + :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) + :type epsilon: float + :param power_ep: Power EP parameters + :type power_ep: list of floats """ + self.epsilon = epsilon + self.eta, self.delta = power_ep + #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) mu = np.zeros(self.N) Sigma = K.copy() @@ -149,11 +156,19 @@ class EP(likelihood): return self._compute_GP_variables() - def fit_DTC(self, Kmm, Kmn): + def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]): """ The expectation-propagation algorithm with sparse pseudo-input. For nomenclature see ... 2013. + + :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) + :type epsilon: float + :param power_ep: Power EP parameters + :type power_ep: list of floats """ + self.epsilon = epsilon + self.eta, self.delta = power_ep + num_inducing = Kmm.shape[0] #TODO: this doesn't work with uncertain inputs! @@ -245,11 +260,19 @@ class EP(likelihood): self._compute_GP_variables() - def fit_FITC(self, Kmm, Kmn, Knn_diag): + def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]): """ The expectation-propagation algorithm with sparse pseudo-input. For nomenclature see Naish-Guzman and Holden, 2008. + + :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) + :type epsilon: float + :param power_ep: Power EP parameters + :type power_ep: list of floats """ + self.epsilon = epsilon + self.eta, self.delta = power_ep + num_inducing = Kmm.shape[0] """