diff --git a/GPy/likelihoods/ep.py b/GPy/inference/ep.py similarity index 94% rename from GPy/likelihoods/ep.py rename to GPy/inference/ep.py index 4fedd66b..aa106067 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/inference/ep.py @@ -19,7 +19,6 @@ class EP(likelihood): self.num_data, self.output_dim = self.data.shape self.is_heteroscedastic = True self.num_params = 0 - self._transf_data = self.noise_model._preprocess_values(data) #Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde) @@ -50,10 +49,26 @@ class EP(likelihood): self.VVT_factor = self.V self.trYYT = 0. - def predictive_values(self,mu,var,full_cov): + def predictive_values(self,mu,var,full_cov,**noise_args): if full_cov: raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" - return self.noise_model.predictive_values(mu,var) + return self.noise_model.predictive_values(mu,var,**noise_args) + + def log_predictive_density(self, y_test, mu_star, var_star): + """ + Calculation of the log predictive density + + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) + + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array + :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) + :type mu_star: (Nx1) array + :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) + :type var_star: (Nx1) array + """ + return self.noise_model.log_predictive_density(y_test, mu_star, var_star) def _get_params(self): #return np.zeros(0) @@ -134,7 +149,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) @@ -233,7 +248,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) @@ -336,7 +351,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) diff --git a/GPy/inference/laplace.py b/GPy/inference/laplace.py new file mode 100644 index 00000000..0def0c8b --- /dev/null +++ b/GPy/inference/laplace.py @@ -0,0 +1,403 @@ +# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) +# +#Parts of this file were influenced by the Matlab GPML framework written by +#Carl Edward Rasmussen & Hannes Nickisch, however all bugs are our own. +# +#The GPML code is released under the FreeBSD License. +#Copyright (c) 2005-2013 Carl Edward Rasmussen & Hannes Nickisch. All rights reserved. +# +#The code and associated documentation is available from +#http://gaussianprocess.org/gpml/code. + +import numpy as np +import scipy as sp +from likelihood import likelihood +from ..util.linalg import mdot, jitchol, pddet, dpotrs +from functools import partial as partial_func +import warnings + +class Laplace(likelihood): + """Laplace approximation to a posterior""" + + def __init__(self, data, noise_model, extra_data=None): + """ + Laplace Approximation + + Find the moments \hat{f} and the hessian at this point + (using Newton-Raphson) of the unnormalised posterior + + Compute the GP variables (i.e. generate some Y^{squiggle} and + z^{squiggle} which makes a gaussian the same as the laplace + approximation to the posterior, but normalised + + Arguments + --------- + + :param data: array of data the likelihood function is approximating + :type data: NxD + :param noise_model: likelihood function - subclass of noise_model + :type noise_model: noise_model + :param extra_data: additional data used by some likelihood functions, + """ + self.data = data + self.noise_model = noise_model + self.extra_data = extra_data + + #Inital values + self.N, self.D = self.data.shape + self.is_heteroscedastic = True + self.Nparams = 0 + self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi)) + + self.restart() + likelihood.__init__(self) + + def restart(self): + """ + Reset likelihood variables to their defaults + """ + #Initial values for the GP variables + self.Y = np.zeros((self.N, 1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:, None] + self.Z = 0 + self.YYT = None + + self.old_Ki_f = None + self.bad_fhat = False + + def predictive_values(self,mu,var,full_cov,**noise_args): + if full_cov: + raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" + return self.noise_model.predictive_values(mu,var,**noise_args) + + def log_predictive_density(self, y_test, mu_star, var_star): + """ + Calculation of the log predictive density + + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) + + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array + :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) + :type mu_star: (Nx1) array + :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) + :type var_star: (Nx1) array + """ + return self.noise_model.log_predictive_density(y_test, mu_star, var_star) + + def _get_params(self): + return np.asarray(self.noise_model._get_params()) + + def _get_param_names(self): + return self.noise_model._get_param_names() + + def _set_params(self, p): + return self.noise_model._set_params(p) + + def _shared_gradients_components(self): + d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data) + dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5? + I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i) + return dL_dfhat, I_KW_i + + def _Kgradients(self): + """ + Gradients with respect to prior kernel parameters dL_dK to be chained + with dK_dthetaK to give dL_dthetaK + :returns: dL_dK matrix + :rtype: Matrix (1 x num_kernel_params) + """ + dL_dfhat, I_KW_i = self._shared_gradients_components() + dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data) + + #Explicit + #expl_a = np.dot(self.Ki_f, self.Ki_f.T) + #expl_b = self.Wi_K_i + #expl = 0.5*expl_a - 0.5*expl_b + #dL_dthetaK_exp = dK_dthetaK(expl, X) + + #Implicit + impl = mdot(dlp, dL_dfhat, I_KW_i) + + #No longer required as we are computing these in the gp already + #otherwise we would take them away and add them back + #dL_dthetaK_imp = dK_dthetaK(impl, X) + #dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp + #dL_dK = expl + impl + + #No need to compute explicit as we are computing dZ_dK to account + #for the difference between the K gradients of a normal GP, + #and the K gradients including the implicit part + dL_dK = impl + return dL_dK + + def _gradients(self, partial): + """ + Gradients with respect to likelihood parameters (dL_dthetaL) + + :param partial: Not needed by this likelihood + :type partial: lambda function + :rtype: array of derivatives (1 x num_likelihood_params) + """ + dL_dfhat, I_KW_i = self._shared_gradients_components() + dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data) + + #len(dlik_dthetaL) + num_params = len(self._get_param_names()) + # make space for one derivative for each likelihood parameter + dL_dthetaL = np.zeros(num_params) + for thetaL_i in range(num_params): + #Explicit + dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i]) + #- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i])))) + + np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i]) + ) + + #Implicit + dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i]) + dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL) + dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp + + return dL_dthetaL + + def _compute_GP_variables(self): + """ + Generate data Y which would give the normal distribution identical + to the laplace approximation to the posterior, but normalised + + GPy expects a likelihood to be gaussian, so need to caluclate + the data Y^{\tilde} that makes the posterior match that found + by a laplace approximation to a non-gaussian likelihood but with + a gaussian likelihood + + Firstly, + The hessian of the unormalised posterior distribution is (K^{-1} + W)^{-1}, + i.e. z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) but this assumes a non-gaussian likelihood, + we wish to find the hessian \Sigma^{\tilde} + that has the same curvature but using our new simulated data Y^{\tilde} + i.e. we do N(Y^{\tilde}|f^{\hat}, \Sigma^{\tilde})N(f|0, K) = z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) + and we wish to find what Y^{\tilde} and \Sigma^{\tilde} + We find that Y^{\tilde} = W^{-1}(K^{-1} + W)f^{\hat} and \Sigma^{tilde} = W^{-1} + + Secondly, + GPy optimizes the log marginal log p(y) = -0.5*ln|K+\Sigma^{\tilde}| - 0.5*Y^{\tilde}^{T}(K^{-1} + \Sigma^{tilde})^{-1}Y + lik.Z + So we can suck up any differences between that and our log marginal likelihood approximation + p^{\squiggle}(y) = -0.5*f^{\hat}K^{-1}f^{\hat} + log p(y|f^{\hat}) - 0.5*log |K||K^{-1} + W| + which we want to optimize instead, by equating them and rearranging, the difference is added onto + the log p(y) that GPy optimizes by default + + Thirdly, + Since we have gradients that depend on how we move f^{\hat}, we have implicit components + aswell as the explicit dL_dK, we hold these differences in dZ_dK and add them to dL_dK in the + gp.py code + """ + Wi = 1.0/self.W + self.Sigma_tilde = np.diagflat(Wi) + + Y_tilde = Wi*self.Ki_f + self.f_hat + + self.Wi_K_i = self.W12BiW12 + ln_det_Wi_K = pddet(self.Sigma_tilde + self.K) + lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data) + y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde) + + Z_tilde = (+ lik + - 0.5*self.ln_B_det + + 0.5*ln_det_Wi_K + - 0.5*self.f_Ki_f + + 0.5*y_Wi_K_i_y + + self.NORMAL_CONST + ) + + #Convert to float as its (1, 1) and Z must be a scalar + self.Z = np.float64(Z_tilde) + self.Y = Y_tilde + self.YYT = np.dot(self.Y, self.Y.T) + self.covariance_matrix = self.Sigma_tilde + self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None] + + #Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods + self.dZ_dK = self._Kgradients() + #+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again + + def fit_full(self, K): + """ + The laplace approximation algorithm, find K and expand hessian + For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability + + :param K: Prior covariance matrix evaluated at locations X + :type K: NxN matrix + """ + self.K = K.copy() + + #Find mode + self.f_hat = self.rasm_mode(self.K) + + #Compute hessian and other variables at mode + self._compute_likelihood_variables() + + #Compute fake variables replicating laplace approximation to posterior + self._compute_GP_variables() + + def _compute_likelihood_variables(self): + """ + Compute the variables required to compute gaussian Y variables + """ + #At this point get the hessian matrix (or vector as W is diagonal) + self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data) + + if not self.noise_model.log_concave: + #print "Under 1e-10: {}".format(np.sum(self.W < 1e-6)) + self.W[self.W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + + self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N)) + + self.Ki_f = self.Ki_f + self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f) + self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K) + + def _compute_B_statistics(self, K, W, a): + """ + Rasmussen suggests the use of a numerically stable positive definite matrix B + Which has a positive diagonal element and can be easyily inverted + + :param K: Prior Covariance matrix evaluated at locations X + :type K: NxN matrix + :param W: Negative hessian at a point (diagonal matrix) + :type W: Vector of diagonal values of hessian (1xN) + :param a: Matrix to calculate W12BiW12a + :type a: Matrix NxN + :returns: (W12BiW12, ln_B_det) + """ + if not self.noise_model.log_concave: + #print "Under 1e-10: {}".format(np.sum(W < 1e-6)) + W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + # If the likelihood is non-log-concave. We wan't to say that there is a negative variance + # To cause the posterior to become less certain than the prior and likelihood, + # This is a property only held by non-log-concave likelihoods + + + #W is diagonal so its sqrt is just the sqrt of the diagonal elements + W_12 = np.sqrt(W) + B = np.eye(self.N) + W_12*K*W_12.T + L = jitchol(B) + + W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0] + ln_B_det = 2*np.sum(np.log(np.diag(L))) + return W12BiW12a, ln_B_det + + def rasm_mode(self, K, MAX_ITER=40): + """ + Rasmussen's numerically stable mode finding + For nomenclature see Rasmussen & Williams 2006 + Influenced by GPML (BSD) code, all errors are our own + + :param K: Covariance matrix evaluated at locations X + :type K: NxD matrix + :param MAX_ITER: Maximum number of iterations of newton-raphson before forcing finish of optimisation + :type MAX_ITER: scalar + :returns: f_hat, mode on which to make laplace approxmiation + :rtype: NxD matrix + """ + #old_Ki_f = np.zeros((self.N, 1)) + + #Start f's at zero originally of if we have gone off track, try restarting + if self.old_Ki_f is None or self.bad_fhat: + old_Ki_f = np.random.rand(self.N, 1)/50.0 + #old_Ki_f = self.Y + f = np.dot(K, old_Ki_f) + else: + #Start at the old best point + old_Ki_f = self.old_Ki_f.copy() + f = self.f_hat.copy() + + new_obj = -np.inf + old_obj = np.inf + + def obj(Ki_f, f): + return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data) + + difference = np.inf + epsilon = 1e-7 + #step_size = 1 + #rs = 0 + i = 0 + + while difference > epsilon and i < MAX_ITER: + W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data) + + W_f = W*f + grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data) + + b = W_f + grad + W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b)) + + #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet + full_step_Ki_f = b - W12BiW12Kb + dKi_f = full_step_Ki_f - old_Ki_f + + f_old = f.copy() + def inner_obj(step_size, old_Ki_f, dKi_f, K): + Ki_f = old_Ki_f + step_size*dKi_f + f = np.dot(K, Ki_f) + # This is nasty, need to set something within an optimization though + self.tmp_Ki_f = Ki_f.copy() + self.tmp_f = f.copy() + return -obj(Ki_f, f) + + i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K) + #Find the stepsize that minimizes the objective function using a brent line search + #The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full + #steps than get this exact then make a step, if B was bigger it might be the other way around though + #new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun + new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10) + f = self.tmp_f.copy() + Ki_f = self.tmp_Ki_f.copy() + + #Optimize without linesearch + #f_old = f.copy() + #update_passed = False + #while not update_passed: + #Ki_f = old_Ki_f + step_size*dKi_f + #f = np.dot(K, Ki_f) + + #old_obj = new_obj + #new_obj = obj(Ki_f, f) + #difference = new_obj - old_obj + ##print "difference: ",difference + #if difference < 0: + ##print "Objective function rose", np.float(difference) + ##If the objective function isn't rising, restart optimization + #step_size *= 0.8 + ##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size) + ##objective function isn't increasing, try reducing step size + #f = f_old.copy() #it's actually faster not to go back to old location and just zigzag across the mode + #old_obj = new_obj + #rs += 1 + #else: + #update_passed = True + + #old_Ki_f = self.Ki_f.copy() + + #difference = abs(new_obj - old_obj) + #old_obj = new_obj.copy() + difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f)) + #difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N) + old_Ki_f = Ki_f.copy() + i += 1 + + self.old_Ki_f = old_Ki_f.copy() + + #Warn of bad fits + if difference > epsilon: + self.bad_fhat = True + warnings.warn("Not perfect f_hat fit difference: {}".format(difference)) + elif self.bad_fhat: + self.bad_fhat = False + warnings.warn("f_hat now perfect again") + + self.Ki_f = Ki_f + return f diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py deleted file mode 100644 index 0cb62eb0..00000000 --- a/GPy/likelihoods/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from ep import EP -from ep_mixed_noise import EP_Mixed_Noise -from gaussian import Gaussian -from gaussian_mixed_noise import Gaussian_Mixed_Noise -from noise_model_constructors import * -# TODO: from Laplace import Laplace - diff --git a/GPy/likelihoods/ep_mixed_noise.py b/GPy/likelihoods/ep_mixed_noise.py deleted file mode 100644 index f5452512..00000000 --- a/GPy/likelihoods/ep_mixed_noise.py +++ /dev/null @@ -1,385 +0,0 @@ -# Copyright (c) 2013, Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats -from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs -from likelihood import likelihood - -class EP_Mixed_Noise(likelihood): - def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]): - """ - Expectation Propagation - - Arguments - --------- - :param data_list: list of outputs - :param noise_model_list: a list of noise models - :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations - :type epsilon: float - :param power_ep: list of power ep parameters - """ - assert len(data_list) == len(noise_model_list) - self.noise_model_list = noise_model_list - n_list = [data.size for data in data_list] - self.n_models = len(data_list) - self.n_params = [noise_model._get_params().size for noise_model in noise_model_list] - self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)]) - self.epsilon = epsilon - self.eta, self.delta = power_ep - self.data = np.vstack(data_list) - self.N, self.output_dim = self.data.shape - self.is_heteroscedastic = True - self.num_params = 0#FIXME - self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)]) - #TODO non-gaussian index - - #Initial values - Likelihood approximation parameters: - #p(y|f) = t(f|tau_tilde,v_tilde) - self.tau_tilde = np.zeros(self.N) - self.v_tilde = np.zeros(self.N) - - #initial values for the GP variables - self.Y = np.zeros((self.N,1)) - self.covariance_matrix = np.eye(self.N) - self.precision = np.ones(self.N)[:,None] - self.Z = 0 - self.YYT = None - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = 0. - - def restart(self): - self.tau_tilde = np.zeros(self.N) - self.v_tilde = np.zeros(self.N) - self.Y = np.zeros((self.N,1)) - self.covariance_matrix = np.eye(self.N) - self.precision = np.ones(self.N)[:,None] - self.Z = 0 - self.YYT = None - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = 0. - - def predictive_values(self,mu,var,full_cov,noise_model): - """ - Predicts the output given the GP - - :param mu: GP's mean - :param var: GP's variance - :param full_cov: whether to return the full covariance matrix, or just the diagonal - :type full_cov: False|True - :param noise_model: noise model to use - :type noise_model: integer - """ - if full_cov: - raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" - #_mu = [] - #_var = [] - #_q1 = [] - #_q2 = [] - #for m,v,o in zip(mu,var,output.flatten()): - # a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v) - # _mu.append(a) - # _var.append(b) - # _q1.append(c) - # _q2.append(d) - #return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2) - return self.noise_model_list[noise_model].predictive_values(mu,var) - - def _get_params(self): - return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list]) - - def _get_param_names(self): - names = [] - for noise_model in self.noise_model_list: - names += noise_model._get_param_names() - return names - - def _set_params(self,p): - cs_params = np.cumsum([0]+self.n_params) - for i in range(len(self.n_params)): - self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]]) - - def _gradients(self,partial): - #NOTE this is not tested - return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list]) - - def _compute_GP_variables(self): - #Variables to be called from GP - mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model - sigma_sum = 1./self.tau_ + 1./self.tau_tilde - mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 - self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep - - self.Y = mu_tilde[:,None] - self.YYT = np.dot(self.Y,self.Y.T) - self.covariance_matrix = np.diag(1./self.tau_tilde) - self.precision = self.tau_tilde[:,None] - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = np.trace(self.YYT) - - def fit_full(self,K): - """ - The expectation-propagation algorithm. - For nomenclature see Rasmussen & Williams 2006. - """ - #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) - mu = np.zeros(self.N) - Sigma = K.copy() - - """ - Initial values - Cavity distribution parameters: - q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.N,dtype=float) - self.v_ = np.empty(self.N,dtype=float) - - #Initial values - Marginal moments - z = np.empty(self.N,dtype=float) - self.Z_hat = np.empty(self.N,dtype=float) - phi = np.empty(self.N,dtype=float) - mu_hat = np.empty(self.N,dtype=float) - sigma2_hat = np.empty(self.N,dtype=float) - - #Approximation - epsilon_np1 = self.epsilon + 1. - epsilon_np2 = self.epsilon + 1. - self.iterations = 0 - self.np1 = [self.tau_tilde.copy()] - self.np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.N) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v - #Posterior distribution parameters update - DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) - mu = np.dot(Sigma,self.v_tilde) - self.iterations += 1 - #Sigma recomptutation with Cholesky decompositon - Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K - B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K - L = jitchol(B) - V,info = dtrtrs(L,Sroot_tilde_K,lower=1) - Sigma = K - np.dot(V.T,V) - mu = np.dot(Sigma,self.v_tilde) - epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N - epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N - self.np1.append(self.tau_tilde.copy()) - self.np2.append(self.v_tilde.copy()) - - return self._compute_GP_variables() - - def fit_DTC(self, Kmm, Kmn): - """ - The expectation-propagation algorithm with sparse pseudo-input. - For nomenclature see ... 2013. - """ - num_inducing = Kmm.shape[0] - - #TODO: this doesn't work with uncertain inputs! - - """ - Prior approximation parameters: - q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) - Sigma0 = Qnn = Knm*Kmmi*Kmn - """ - KmnKnm = np.dot(Kmn,Kmn.T) - Lm = jitchol(Kmm) - Lmi = chol_inv(Lm) - Kmmi = np.dot(Lmi.T,Lmi) - KmmiKmn = np.dot(Kmmi,Kmn) - Qnn_diag = np.sum(Kmn*KmmiKmn,-2) - LLT0 = Kmm.copy() - - #Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm) - #KmnKnm = np.dot(Kmn, Kmn.T) - #KmmiKmn = np.dot(Kmmi,Kmn) - #Qnn_diag = np.sum(Kmn*KmmiKmn,-2) - #LLT0 = Kmm.copy() - - """ - Posterior approximation: q(f|y) = N(f| mu, Sigma) - Sigma = Diag + P*R.T*R*P.T + K - mu = w + P*Gamma - """ - mu = np.zeros(self.N) - LLT = Kmm.copy() - Sigma_diag = Qnn_diag.copy() - - """ - Initial values - Cavity distribution parameters: - q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.N,dtype=float) - self.v_ = np.empty(self.N,dtype=float) - - #Initial values - Marginal moments - z = np.empty(self.N,dtype=float) - self.Z_hat = np.empty(self.N,dtype=float) - phi = np.empty(self.N,dtype=float) - mu_hat = np.empty(self.N,dtype=float) - sigma2_hat = np.empty(self.N,dtype=float) - - #Approximation - epsilon_np1 = 1 - epsilon_np2 = 1 - self.iterations = 0 - np1 = [self.tau_tilde.copy()] - np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.N) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v - #Posterior distribution parameters update - DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau - L = jitchol(LLT) - #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau)) - V,info = dtrtrs(L,Kmn,lower=1) - Sigma_diag = np.sum(V*V,-2) - si = np.sum(V.T*V[:,i],-1) - mu += (Delta_v-Delta_tau*mu[i])*si - self.iterations += 1 - #Sigma recomputation with Cholesky decompositon - LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T) - L = jitchol(LLT) - V,info = dtrtrs(L,Kmn,lower=1) - V2,info = dtrtrs(L.T,V,lower=0) - Sigma_diag = np.sum(V*V,-2) - Knmv_tilde = np.dot(Kmn,self.v_tilde) - mu = np.dot(V2.T,Knmv_tilde) - epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N - epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N - np1.append(self.tau_tilde.copy()) - np2.append(self.v_tilde.copy()) - - self._compute_GP_variables() - - def fit_FITC(self, Kmm, Kmn, Knn_diag): - """ - The expectation-propagation algorithm with sparse pseudo-input. - For nomenclature see Naish-Guzman and Holden, 2008. - """ - num_inducing = Kmm.shape[0] - - """ - Prior approximation parameters: - q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) - Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn - """ - Lm = jitchol(Kmm) - Lmi = chol_inv(Lm) - Kmmi = np.dot(Lmi.T,Lmi) - P0 = Kmn.T - KmnKnm = np.dot(P0.T, P0) - KmmiKmn = np.dot(Kmmi,P0.T) - Qnn_diag = np.sum(P0.T*KmmiKmn,-2) - Diag0 = Knn_diag - Qnn_diag - R0 = jitchol(Kmmi).T - - """ - Posterior approximation: q(f|y) = N(f| mu, Sigma) - Sigma = Diag + P*R.T*R*P.T + K - mu = w + P*Gamma - """ - self.w = np.zeros(self.N) - self.Gamma = np.zeros(num_inducing) - mu = np.zeros(self.N) - P = P0.copy() - R = R0.copy() - Diag = Diag0.copy() - Sigma_diag = Knn_diag - RPT0 = np.dot(R0,P0.T) - - """ - Initial values - Cavity distribution parameters: - q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.N,dtype=float) - self.v_ = np.empty(self.N,dtype=float) - - #Initial values - Marginal moments - z = np.empty(self.N,dtype=float) - self.Z_hat = np.empty(self.N,dtype=float) - phi = np.empty(self.N,dtype=float) - mu_hat = np.empty(self.N,dtype=float) - sigma2_hat = np.empty(self.N,dtype=float) - - #Approximation - epsilon_np1 = 1 - epsilon_np2 = 1 - self.iterations = 0 - self.np1 = [self.tau_tilde.copy()] - self.np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.N) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v - #Posterior distribution parameters update - dtd1 = Delta_tau*Diag[i] + 1. - dii = Diag[i] - Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 - pi_ = P[i,:].reshape(1,num_inducing) - P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ - Rp_i = np.dot(R,pi_.T) - RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) - R = jitchol(RTR).T - self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1 - self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) - RPT = np.dot(R,P.T) - Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) - mu = self.w + np.dot(P,self.Gamma) - self.iterations += 1 - #Sigma recomptutation with Cholesky decompositon - Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) - Diag = Diag0 * Iplus_Dprod_i - P = Iplus_Dprod_i[:,None] * P0 - safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0) - L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T)) - R,info = dtrtrs(L,R0,lower=1) - RPT = np.dot(R,P.T) - Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) - self.w = Diag * self.v_tilde - self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde)) - mu = self.w + np.dot(P,self.Gamma) - epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N - epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N - self.np1.append(self.tau_tilde.copy()) - self.np2.append(self.v_tilde.copy()) - - return self._compute_GP_variables() diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py deleted file mode 100644 index e68bfafa..00000000 --- a/GPy/likelihoods/gaussian.py +++ /dev/null @@ -1,110 +0,0 @@ -import numpy as np -from likelihood import likelihood -from ..util.linalg import jitchol -from ..core.parameter import Param - - -class Gaussian(likelihood): - """ - Likelihood class for doing Expectation propagation - - :param data: observed output - :type data: Nx1 numpy.darray - :param variance: noise parameter - :param normalize: whether to normalize the data before computing (predictions will be in original scales) - :type normalize: False|True - """ - def __init__(self, data, variance=1., normalize=False): - super(Gaussian, self).__init__('gaussian') - self.is_heteroscedastic = False - #self.num_params = 1 - self.Z = 0. # a correction factor which accounts for the approximation made - N, self.output_dim = data.shape - - # normalization - if normalize: - self._offset = data.mean(0)[None, :] - self._scale = data.std(0)[None, :] - # Don't scale outputs which have zero variance to zero. - self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3 - else: - self._offset = np.zeros((1, self.output_dim)) - self._scale = np.ones((1, self.output_dim)) - - self.set_data(data) - - self.variance = Param('variance', variance) - self.variance.add_observer(self, self.update_variance) - self.add_parameter(self.variance) - - #self.parameters_changed() -# self._set_params(np.asarray(variance)) - - - def set_data(self, data): - self.data = data - self.N, D = data.shape - assert D == self.output_dim - self.Y = (self.data - self._offset) / self._scale - if D > self.N: - self.YYT = np.dot(self.Y, self.Y.T) - self.trYYT = np.trace(self.YYT) - self.YYT_factor = jitchol(self.YYT) - else: - self.YYT = None - self.trYYT = np.sum(np.square(self.Y)) - self.YYT_factor = self.Y - -# def _get_params(self): -# return np.asarray(self._variance) -# -# def _get_param_names(self): -# return ["noise_variance"] -# -# def _set_params(self, x): -# self.variance = x[0] - - def update_variance(self, v): - if np.all(self.variance == 0.): #special case of zero noise - self.precision = np.inf - self.V = None - else: - self.precision = (1. / self.variance).squeeze() - self.V = (self.precision) * self.Y - self.VVT_factor = self.precision * self.YYT_factor - self.covariance_matrix = np.eye(self.N) * self.variance - #self._variance = self.variance.copy() - -# def parameters_changed(self): -# if np.any(self._variance != self.variance): -# self.update_variance() - - def predictive_values(self, mu, var, full_cov): - """ - Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval - """ - mean = mu * self._scale + self._offset - if full_cov: - if self.output_dim > 1: - raise NotImplementedError, "TODO" - # Note. for output_dim>1, we need to re-normalise all the outputs independently. - # This will mess up computations of diag(true_var), below. - # note that the upper, lower quantiles should be the same shape as mean - # Augment the output variance with the likelihood variance and rescale. - true_var = (var + np.eye(var.shape[0]) * self.variance) * self._scale ** 2 - _5pc = mean - 2.*np.sqrt(np.diag(true_var)) - _95pc = mean + 2.*np.sqrt(np.diag(true_var)) - else: - true_var = (var + self.variance) * self._scale ** 2 - _5pc = mean - 2.*np.sqrt(true_var) - _95pc = mean + 2.*np.sqrt(true_var) - return mean, true_var, _5pc, _95pc - - def fit_full(self): - """ - No approximations needed - """ - pass - - def _gradients(self, partial): - return np.sum(partial) diff --git a/GPy/likelihoods/gaussian_mixed_noise.py b/GPy/likelihoods/gaussian_mixed_noise.py deleted file mode 100644 index 696867c0..00000000 --- a/GPy/likelihoods/gaussian_mixed_noise.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2013, Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats -from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs -from likelihood import likelihood -from . import Gaussian - - -class Gaussian_Mixed_Noise(likelihood): - """ - Gaussian Likelihood for multiple outputs - - This is a wrapper around likelihood.Gaussian class - - :param data_list: data observations - :type data_list: list of numpy arrays (num_data_output_i x 1), one array per output - :param noise_params: noise parameters of each output - :type noise_params: list of floats, one per output - :param normalize: whether to normalize the data before computing (predictions will be in original scales) - :type normalize: False|True - """ - def __init__(self, data_list, noise_params=None, normalize=True): - self.num_params = len(data_list) - self.n_list = [data.size for data in data_list] - self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.num_params),self.n_list)]) - - if noise_params is None: - noise_params = [1.] * self.num_params - else: - assert self.num_params == len(noise_params), 'Number of noise parameters does not match the number of noise models.' - - self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)] - self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list] - self.data = np.vstack(data_list) - self.N, self.output_dim = self.data.shape - self._offset = np.zeros((1, self.output_dim)) - self._scale = np.ones((1, self.output_dim)) - - self.is_heteroscedastic = True - self.Z = 0. # a correction factor which accounts for the approximation made - - self.set_data(data_list) - self._set_params(np.asarray(noise_params)) - - super(Gaussian_Mixed_Noise, self).__init__() - - def set_data(self, data_list): - self.data = np.vstack(data_list) - self.N, D = self.data.shape - assert D == self.output_dim - self.Y = (self.data - self._offset) / self._scale - if D > self.N: - raise NotImplementedError - #self.YYT = np.dot(self.Y, self.Y.T) - #self.trYYT = np.trace(self.YYT) - #self.YYT_factor = jitchol(self.YYT) - else: - self.YYT = None - self.trYYT = np.sum(np.square(self.Y)) - self.YYT_factor = self.Y - - def predictive_values(self,mu,var,full_cov,noise_model): - """ - Predicts the output given the GP - - :param mu: GP's mean - :param var: GP's variance - :param full_cov: whether to return the full covariance matrix, or just the diagonal - :type full_cov: False|True - :param noise_model: noise model to use - :type noise_model: integer - """ - if full_cov: - raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" - return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov) - - def _get_params(self): - return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list]) - - def _get_param_names(self): - if len(self.noise_model_list) == 1: - names = self.noise_model_list[0]._get_param_names() - else: - names = [] - for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))): - names.append(''.join(noise_model._get_param_names() + ['_%s' %i])) - return names - - def _set_params(self,p): - cs_params = np.cumsum([0]+self.n_params) - - for i in range(len(self.n_params)): - self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]]) - self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None] - - self.V = self.precision * self.Y - self.VVT_factor = self.precision * self.YYT_factor - self.covariance_matrix = np.eye(self.N) * 1./self.precision - - def _gradients(self,partial): - gradients = [] - aux = np.cumsum([0]+self.n_list) - for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list): - gradients += [noise_model._gradients(partial[ai:af])] - return np.hstack(gradients) diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py deleted file mode 100644 index d94af758..00000000 --- a/GPy/likelihoods/likelihood.py +++ /dev/null @@ -1,43 +0,0 @@ -import numpy as np -import copy -from ..core.parameterized import Parameterized - -class likelihood(Parameterized): - """ - The atom for a likelihood class - - This object interfaces the GP and the data. The most basic likelihood - (Gaussian) inherits directly from this, as does the EP algorithm - - Some things must be defined for this to work properly: - - - self.Y : the effective Gaussian target of the GP - - self.N, self.D : Y.shape - - self.covariance_matrix : the effective (noise) covariance of the GP targets - - self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP) - - self.is_heteroscedastic : enables significant computational savings in GP - - self.precision : a scalar or vector representation of the effective target precision - - self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N - - self.V : self.precision * self.Y - - """ - def __init__(self, name=None): - Parameterized.__init__(self, name) - -# def _get_params(self): -# raise NotImplementedError -# -# def _get_param_names(self): -# raise NotImplementedError -# -# def _set_params(self, x): -# raise NotImplementedError - - def fit(self): - raise NotImplementedError - - def _gradients(self, partial): - raise NotImplementedError - - def predictive_values(self, mu, var): - raise NotImplementedError diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py deleted file mode 100644 index ec971e04..00000000 --- a/GPy/likelihoods/noise_model_constructors.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2013, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -import numpy as np -import noise_models - -def binomial(gp_link=None): - """ - Construct a binomial likelihood - - :param gp_link: a GPy gp_link function - """ - if gp_link is None: - gp_link = noise_models.gp_transformations.Probit() - #else: - # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' - - if isinstance(gp_link,noise_models.gp_transformations.Probit): - analytical_mean = True - analytical_variance = False - - elif isinstance(gp_link,noise_models.gp_transformations.Heaviside): - analytical_mean = True - analytical_variance = True - - else: - analytical_mean = False - analytical_variance = False - - return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance) - -def exponential(gp_link=None): - """ - Construct a binomial likelihood - - :param gp_link: a GPy gp_link function - """ - if gp_link is None: - gp_link = noise_models.gp_transformations.Identity() - - analytical_mean = False - analytical_variance = False - return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance) - -def gaussian_ep(gp_link=None,variance=1.): - """ - Construct a gaussian likelihood - - :param gp_link: a GPy gp_link function - :param variance: scalar - """ - if gp_link is None: - gp_link = noise_models.gp_transformations.Identity() - #else: - # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' - - analytical_mean = False - analytical_variance = False - return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance) - -def poisson(gp_link=None): - """ - Construct a Poisson likelihood - - :param gp_link: a GPy gp_link function - """ - if gp_link is None: - gp_link = noise_models.gp_transformations.Log_ex_1() - #else: - # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' - analytical_mean = False - analytical_variance = False - return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance) - -def gamma(gp_link=None,beta=1.): - """ - Construct a Gamma likelihood - - :param gp_link: a GPy gp_link function - :param beta: scalar - """ - if gp_link is None: - gp_link = noise_models.gp_transformations.Log_ex_1() - analytical_mean = False - analytical_variance = False - return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta) - - diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py deleted file mode 100644 index b47702a7..00000000 --- a/GPy/likelihoods/noise_models/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -import noise_distributions -import binomial_noise -import exponential_noise -import gaussian_noise -import gamma_noise -import poisson_noise -import gp_transformations diff --git a/GPy/likelihoods/noise_models/binomial_noise.py b/GPy/likelihoods/noise_models/binomial_noise.py deleted file mode 100644 index c0bb8be4..00000000 --- a/GPy/likelihoods/noise_models/binomial_noise.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import gp_transformations -from noise_distributions import NoiseDistribution - -class Binomial(NoiseDistribution): - """ - Probit likelihood - Y is expected to take values in {-1,1} - ----- - $$ - L(x) = \\Phi (Y_i*f_i) - $$ - """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): - super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance) - - def _preprocess_values(self,Y): - """ - Check if the values of the observations correspond to the values - assumed by the likelihood function. - - ..Note:: Binary classification algorithm works better with classes {-1,1} - """ - Y_prep = Y.copy() - Y1 = Y[Y.flatten()==1].size - Y2 = Y[Y.flatten()==0].size - assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' - Y_prep[Y.flatten() == 0] = -1 - return Y_prep - - def _moments_match_analytical(self,data_i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - if isinstance(self.gp_link,gp_transformations.Probit): - z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) - Z_hat = std_norm_cdf(z) - phi = std_norm_pdf(z) - mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) - sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) - - elif isinstance(self.gp_link,gp_transformations.Heaviside): - a = data_i*v_i/np.sqrt(tau_i) - Z_hat = std_norm_cdf(a) - N = std_norm_pdf(a) - mu_hat = v_i/tau_i + data_i*N/Z_hat/np.sqrt(tau_i) - sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i - if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])): - stop - - return Z_hat, mu_hat, sigma2_hat - - def _predictive_mean_analytical(self,mu,sigma): - if isinstance(self.gp_link,gp_transformations.Probit): - return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) - elif isinstance(self.gp_link,gp_transformations.Heaviside): - return stats.norm.cdf(mu/sigma) - else: - raise NotImplementedError - - def _predictive_variance_analytical(self,mu,sigma, pred_mean): - if isinstance(self.gp_link,gp_transformations.Heaviside): - return 0. - else: - raise NotImplementedError - - def _mass(self,gp,obs): - #NOTE obs must be in {0,1} - p = self.gp_link.transf(gp) - return p**obs * (1.-p)**(1.-obs) - - def _nlog_mass(self,gp,obs): - p = self.gp_link.transf(gp) - return obs*np.log(p) + (1.-obs)*np.log(1-p) - - def _dnlog_mass_dgp(self,gp,obs): - p = self.gp_link.transf(gp) - dp = self.gp_link.dtransf_df(gp) - return obs/p * dp - (1.-obs)/(1.-p) * dp - - def _d2nlog_mass_dgp2(self,gp,obs): - p = self.gp_link.transf(gp) - return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp) - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - p = self.gp_link.transf(gp) - return p*(1.-p) - - def _dvariance_dgp(self,gp): - return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp)) - - def _d2variance_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2 - - - def samples(self, gp): - """ - Returns a set of samples of observations based on a given value of the latent variable. - - :param size: number of samples to compute - :param gp: latent variable - """ - orig_shape = gp.shape - gp = gp.flatten() - Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp]) - return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/noise_models/exponential_noise.py b/GPy/likelihoods/noise_models/exponential_noise.py deleted file mode 100644 index 56e63c75..00000000 --- a/GPy/likelihoods/noise_models/exponential_noise.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import gp_transformations -from noise_distributions import NoiseDistribution - -class Exponential(NoiseDistribution): - """ - Expoential likelihood - Y is expected to take values in {0,1,2,...} - ----- - $$ - L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! - $$ - """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): - super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance) - - def _preprocess_values(self,Y): - return Y - - def _mass(self,gp,obs): - """ - Mass (or density) function - """ - return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp) - - def _nlog_mass(self,gp,obs): - """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted - """ - return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp)) - - def _dnlog_mass_dgp(self,gp,obs): - return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp) - - def _d2nlog_mass_dgp2(self,gp,obs): - fgp = self.gp_link.transf(gp) - return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp) - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp)**2 - - def _dvariance_dgp(self,gp): - return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp) - - def _d2variance_dgp2(self,gp): - return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp)) diff --git a/GPy/likelihoods/noise_models/gamma_noise.py b/GPy/likelihoods/noise_models/gamma_noise.py deleted file mode 100644 index 6bf0dd7b..00000000 --- a/GPy/likelihoods/noise_models/gamma_noise.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import gp_transformations -from noise_distributions import NoiseDistribution - -class Gamma(NoiseDistribution): - """ - Gamma likelihood - Y is expected to take values in {0,1,2,...} - ----- - $$ - L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! - $$ - """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.): - self.beta = beta - super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance) - - def _preprocess_values(self,Y): - return Y - - def _mass(self,gp,obs): - """ - Mass (or density) function - """ - #return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance) - alpha = self.gp_link.transf(gp)*self.beta - return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha) - - def _nlog_mass(self,gp,obs): - """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted - """ - alpha = self.gp_link.transf(gp)*self.beta - return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) - - def _dnlog_mass_dgp(self,gp,obs): - return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta - - def _d2nlog_mass_dgp2(self,gp,obs): - return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp)/self.beta - - def _dvariance_dgp(self,gp): - return self.gp_link.dtransf_df(gp)/self.beta - - def _d2variance_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp)/self.beta diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py deleted file mode 100644 index 93ac9acd..00000000 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -import numpy as np -from scipy import stats,special -import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import gp_transformations -from noise_distributions import NoiseDistribution - -class Gaussian(NoiseDistribution): - """ - Gaussian likelihood - - :param mean: mean value of the Gaussian distribution - :param variance: mean value of the Gaussian distribution - """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.): - self.variance = variance - super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance) - - def _get_params(self): - return np.array([self.variance]) - - def _get_param_names(self): - return ['noise_model_variance'] - - def _set_params(self,p): - self.variance = p - - def _gradients(self,partial): - return np.zeros(1) - #return np.sum(partial) - - def _preprocess_values(self,Y): - """ - Check if the values of the observations correspond to the values - assumed by the likelihood function. - """ - return Y - - def _moments_match_analytical(self,data_i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - sigma2_hat = 1./(1./self.variance + tau_i) - mu_hat = sigma2_hat*(data_i/self.variance + v_i) - sum_var = self.variance + 1./tau_i - Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var) - return Z_hat, mu_hat, sigma2_hat - - def _predictive_mean_analytical(self,mu,sigma): - new_sigma2 = self.predictive_variance(mu,sigma) - return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance) - - def _predictive_variance_analytical(self,mu,sigma): - return 1./(1./self.variance + 1./sigma**2) - - def _mass(self,gp,obs): - #return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) - return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance)) - - def _nlog_mass(self,gp,obs): - return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance)) - - def _dnlog_mass_dgp(self,gp,obs): - return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp) - - def _d2nlog_mass_dgp2(self,gp,obs): - return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - return self.variance - - def _dvariance_dgp(self,gp): - return 0 - - def _d2variance_dgp2(self,gp): - return 0 diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py deleted file mode 100644 index e95e9df7..00000000 --- a/GPy/likelihoods/noise_models/gp_transformations.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats -import scipy as sp -import pylab as pb -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf - -class GPTransformation(object): - """ - Link function class for doing non-Gaussian likelihoods approximation - - :param Y: observed output (Nx1 numpy.darray) - - .. note:: Y values allowed depend on the likelihood_function used - - """ - def __init__(self): - pass - - def transf(self,f): - """ - Gaussian process tranformation function, latent space -> output space - """ - pass - - def dtransf_df(self,f): - """ - derivative of transf(f) w.r.t. f - """ - pass - - def d2transf_df2(self,f): - """ - second derivative of transf(f) w.r.t. f - """ - pass - -class Identity(GPTransformation): - """ - .. math:: - - g(f) = f - - """ - def transf(self,f): - return f - - def dtransf_df(self,f): - return 1. - - def d2transf_df2(self,f): - return 0 - - -class Probit(GPTransformation): - """ - .. math:: - - g(f) = \\Phi^{-1} (mu) - - """ - def transf(self,f): - return std_norm_cdf(f) - - def dtransf_df(self,f): - return std_norm_pdf(f) - - def d2transf_df2(self,f): - return -f * std_norm_pdf(f) - -class Log(GPTransformation): - """ - .. math:: - - g(f) = \\log(\\mu) - - """ - def transf(self,f): - return np.exp(f) - - def dtransf_df(self,f): - return np.exp(f) - - def d2transf_df2(self,f): - return np.exp(f) - -class Log_ex_1(GPTransformation): - """ - .. math:: - - g(f) = \\log(\\exp(\\mu) - 1) - - """ - def transf(self,f): - return np.log(1.+np.exp(f)) - - def dtransf_df(self,f): - return np.exp(f)/(1.+np.exp(f)) - - def d2transf_df2(self,f): - aux = np.exp(f)/(1.+np.exp(f)) - return aux*(1.-aux) - -class Reciprocal(GPTransformation): - def transf(sefl,f): - return 1./f - - def dtransf_df(self,f): - return -1./f**2 - - def d2transf_df2(self,f): - return 2./f**3 - -class Heaviside(GPTransformation): - """ - - .. math:: - - g(f) = I_{x \\in A} - - """ - def transf(self,f): - #transformation goes here - return np.where(f>0, 1, 0) - - def dtransf_df(self,f): - raise NotImplementedError, "This function is not differentiable!" - - def d2transf_df2(self,f): - raise NotImplementedError, "This function is not differentiable!" diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py deleted file mode 100644 index 913c2b9d..00000000 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ /dev/null @@ -1,425 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -import pylab as pb -from GPy.util.plot import gpplot -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import gp_transformations - - -class NoiseDistribution(object): - """ - Likelihood class for doing Expectation propagation - - :param Y: observed output (Nx1 numpy.darray) - - .. note:: Y values allowed depend on the LikelihoodFunction used - """ - def __init__(self,gp_link,analytical_mean=False,analytical_variance=False): - assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation." - self.gp_link = gp_link - self.analytical_mean = analytical_mean - self.analytical_variance = analytical_variance - if self.analytical_mean: - self.moments_match = self._moments_match_analytical - self.predictive_mean = self._predictive_mean_analytical - else: - self.moments_match = self._moments_match_numerical - self.predictive_mean = self._predictive_mean_numerical - if self.analytical_variance: - self.predictive_variance = self._predictive_variance_analytical - else: - self.predictive_variance = self._predictive_variance_numerical - - def _get_params(self): - return np.zeros(0) - - def _get_param_names(self): - return [] - - def _set_params(self,p): - pass - - def _gradients(self,partial): - return np.zeros(0) - - def _preprocess_values(self,Y): - """ - In case it is needed, this function assess the output values or makes any pertinent transformation on them. - - :param Y: observed output - :type Y: Nx1 numpy.darray - - """ - return Y - - def _product(self,gp,obs,mu,sigma): - """ - Product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs) - - def _nlog_product_scaled(self,gp,obs,mu,sigma): - """ - Negative log-product between the cavity distribution and a likelihood factor. - - .. note:: The constant term in the Gaussian distribution is ignored. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs) - - def _dnlog_product_dgp(self,gp,obs,mu,sigma): - """ - Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs) - - def _d2nlog_product_dgp2(self,gp,obs,mu,sigma): - """ - Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs) - - def _product_mode(self,obs,mu,sigma): - """ - Newton's CG method to find the mode in _product (cavity x likelihood factor). - - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False) - - def _moments_match_analytical(self,obs,tau,v): - """ - If available, this function computes the moments analytically. - """ - pass - - def _moments_match_numerical(self,obs,tau,v): - """ - Lapace approximation to calculate the moments. - - :param obs: observed output - :param tau: cavity distribution 1st natural parameter (precision) - :param v: cavity distribution 2nd natural paramenter (mu*precision) - - """ - mu = v/tau - mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) - sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs)) - Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) - return Z_hat,mu_hat,sigma2_hat - - def _nlog_conditional_mean_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - .. note:: This function helps computing E(Y_star) = E(E(Y_star|f_star)) - - """ - return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp)) - - def _dnlog_conditional_mean_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_conditional_mean_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) - - def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_conditional_mean_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2 - - def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - .. note:: This function helps computing E(V(Y_star|f_star)) - - """ - return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp)) - - def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) - - def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2 - - def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - .. note:: This function helps computing E( E(Y_star|f_star)**2 ) - - """ - return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)) - - def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp) - - def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 ) - - def _predictive_mean_analytical(self,mu,sigma): - """ - If available, this function computes the predictive mean analytically. - """ - pass - - def _predictive_variance_analytical(self,mu,sigma): - """ - If available, this function computes the predictive variance analytically. - """ - pass - - def _predictive_mean_numerical(self,mu,sigma): - """ - Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) ) - - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False) - mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma) - """ - - pb.figure() - x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) - f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) - pb.plot(x,f,'b-') - sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma) - f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) - k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) - pb.plot(x,f2*mean,'r-') - pb.vlines(maximum,0,f.max()) - """ - return mean - - def _predictive_mean_sq(self,mu,sigma): - """ - Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 ) - - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False) - mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma) - return mean_squared - - def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None): - """ - Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) - - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. - - """ - # E( V(Y_star|f_star) ) - maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False) - exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma) - - """ - pb.figure() - x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) - f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) - pb.plot(x,f,'b-') - sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma) - f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) - k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) - pb.plot(x,f2*exp_var,'r--') - pb.vlines(maximum,0,f.max()) - """ - - #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 ) - exp_exp2 = self._predictive_mean_sq(mu,sigma) - if predictive_mean is None: - predictive_mean = self.predictive_mean(mu,sigma) - var_exp = exp_exp2 - predictive_mean**2 - return exp_var + var_exp - - def _predictive_percentiles(self,p,mu,sigma): - """ - Percentiles of the predictive distribution - - :parm p: lower tail probability - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. - - """ - qf = stats.norm.ppf(p,mu,sigma) - return self.gp_link.transf(qf) - - def _nlog_joint_predictive_scaled(self,x,mu,sigma): - """ - Negative logarithm of the joint predictive distribution (latent variable and output). - - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - - """ - return self._nlog_product_scaled(x[0],x[1],mu,sigma) - - def _gradient_nlog_joint_predictive(self,x,mu,sigma): - """ - Gradient of _nlog_joint_predictive_scaled. - - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - - .. note: Only available when the output is continuous - - """ - assert not self.discrete, "Gradient not available for discrete outputs." - return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0]))) - - def _hessian_nlog_joint_predictive(self,x,mu,sigma): - """ - Hessian of _nlog_joint_predictive_scaled. - - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - - .. note: Only available when the output is continuous - - """ - assert not self.discrete, "Hessian not available for discrete outputs." - cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1]) - return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) - - def _joint_predictive_mode(self,mu,sigma): - """ - Negative logarithm of the joint predictive distribution (latent variable and output). - - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - - """ - return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False) - - def predictive_values(self,mu,var): - """ - Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction. - - :param mu: mean of the latent variable - :param var: variance of the latent variable - - """ - if isinstance(mu,float) or isinstance(mu,int): - mu = [mu] - var = [var] - pred_mean = [] - pred_var = [] - q1 = [] - q3 = [] - for m,s in zip(mu,np.sqrt(var)): - pred_mean.append(self.predictive_mean(m,s)) - pred_var.append(self.predictive_variance(m,s,pred_mean[-1])) - q1.append(self._predictive_percentiles(.025,m,s)) - q3.append(self._predictive_percentiles(.975,m,s)) - pred_mean = np.vstack(pred_mean) - pred_var = np.vstack(pred_var) - q1 = np.vstack(q1) - q3 = np.vstack(q3) - return pred_mean, pred_var, q1, q3 - - - def samples(self, gp): - """ - Returns a set of samples of observations based on a given value of the latent variable. - - :param gp: latent variable - """ - pass - diff --git a/GPy/likelihoods/noise_models/poisson_noise.py b/GPy/likelihoods/noise_models/poisson_noise.py deleted file mode 100644 index 33de84cd..00000000 --- a/GPy/likelihoods/noise_models/poisson_noise.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import gp_transformations -from noise_distributions import NoiseDistribution - -class Poisson(NoiseDistribution): - """ - Poisson likelihood - - .. math:: - L(x) = \\exp(\\lambda) * \\frac{\\lambda^Y_i}{Y_i!} - - ..Note: Y is expected to take values in {0,1,2,...} - """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): - super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance) - - def _preprocess_values(self,Y): #TODO - return Y - - def _mass(self,gp,obs): - """ - Mass (or density) function - """ - return stats.poisson.pmf(obs,self.gp_link.transf(gp)) - - def _nlog_mass(self,gp,obs): - """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted - """ - return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1)) - - def _dnlog_mass_dgp(self,gp,obs): - return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp)) - - def _d2nlog_mass_dgp2(self,gp,obs): - d2_df = self.gp_link.d2transf_df2(gp) - transf = self.gp_link.transf(gp) - return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _dvariance_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2variance_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp)