Added sampling for predictive quantiles and also mean and variance

where necessary
This commit is contained in:
Alan Saul 2013-10-28 15:22:06 +00:00
parent 5a924ff5cb
commit 336f8e11c4
4 changed files with 53 additions and 39 deletions

View file

@ -61,6 +61,7 @@ def toy_linear_1d_classification(seed=default_seed):
#m.update_likelihood_approximation()
# Parameters optimization:
#m.optimize()
#m.update_likelihood_approximation()
m.pseudo_EM()
# Plot

View file

@ -272,11 +272,10 @@ def toy_rbf_1d_50(max_iters=100):
def toy_poisson_rbf_1d(optimizer='bfgs', max_nb_eval_optim=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
X = np.linspace(0,10)[:, None]
F = np.round(X*3-4)
F = np.where(F > 0, F, 0)
eps = np.random.randint(0,4, F.shape[0])[:, None]
Y = F + eps
x_len = 400
X = np.linspace(0, 10, x_len)[:, None]
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
noise_model = GPy.likelihoods.poisson()
likelihood = GPy.likelihoods.EP(Y,noise_model)
@ -293,11 +292,10 @@ def toy_poisson_rbf_1d(optimizer='bfgs', max_nb_eval_optim=100):
def toy_poisson_rbf_1d_laplace(optimizer='bfgs', max_nb_eval_optim=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
X = np.linspace(0,10)[:, None]
F = np.round(X*3-4)
F = np.where(F > 0, F, 0)
eps = np.random.randint(0,4, F.shape[0])[:, None]
Y = F + eps
x_len = 30
X = np.linspace(0, 10, x_len)[:, None]
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
noise_model = GPy.likelihoods.poisson()
likelihood = GPy.likelihoods.Laplace(Y,noise_model)
@ -309,6 +307,8 @@ def toy_poisson_rbf_1d_laplace(optimizer='bfgs', max_nb_eval_optim=100):
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
# plot
m.plot()
# plot the real underlying rate function
pb.plot(X, np.exp(f_true), '--k', linewidth=2)
print(m)
return m

View file

@ -1,4 +1,4 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
#Parts of this file were influenced by the Matlab GPML framework written by

View file

@ -150,6 +150,8 @@ class NoiseDistribution(object):
:param sigma: standard deviation of posterior
"""
#FIXME: Quadrature does not work!
raise NotImplementedError
sigma2 = sigma**2
#Compute first moment
def int_mean(f):
@ -193,19 +195,6 @@ class NoiseDistribution(object):
# V(Y_star | f_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
return exp_var + var_exp
def _predictive_percentiles(self,p,mu,sigma):
"""
Percentiles of the predictive distribution
:parm p: lower tail probability
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
qf = stats.norm.ppf(p,mu,sigma)
return self.gp_link.transf(qf)
def pdf_link(self, link_f, y, extra_data=None):
raise NotImplementedError
@ -386,26 +375,50 @@ class NoiseDistribution(object):
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
def predictive_values(self,mu,var):
def predictive_values(self, mu, var, full_cov=False, num_samples=5000,
sampling=False):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
:param mu: mean of the latent variable, f
:param var: variance of the latent variable, f
:param mu: mean of the latent variable, f, of posterior
:param var: variance of the latent variable, f, of posterior
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
:param num_samples: number of samples to use in computing quantiles and
possibly mean variance
:type num_samples: integer
:param sampling: Whether to use samples for mean and variances anyway
:type sampling: Boolean
"""
if isinstance(mu,float) or isinstance(mu,int):
mu = [mu]
var = [var]
pred_mean = []
pred_var = []
q1 = []
q3 = []
for m,s in zip(mu,np.sqrt(var)):
pred_mean.append(self.predictive_mean(m,s))
pred_var.append(self.predictive_variance(m,s,pred_mean[-1]))
q1.append(self._predictive_percentiles(.025,m,s))
q3.append(self._predictive_percentiles(.975,m,s))
#Get gp_samples f* using posterior mean and variance
if not full_cov:
gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
size=num_samples).T
else:
gp_samples = np.random.multivariate_normal(mu.flatten(), var,
size=num_samples).T
#Push gp samples (f*) through likelihood to give p(y*|f*)
samples = self.samples(gp_samples)
axis=-1
if self.analytical_mean and not sampling:
pred_mean = self.predictive_mean(mu, np.sqrt(var))
else:
pred_mean = np.mean(samples, axis=axis)
if self.analytical_variance and not sampling:
pred_var = self.predictive_variance(mu, np.sqrt(var), pred_mean)
else:
pred_var = np.var(samples, axis=axis)
#Calculate quantiles from samples
q1 = np.percentile(samples, 2.5, axis=axis)
q3 = np.percentile(samples, 97.5, axis=axis)
print "WARNING: Using sampling to calculate predictive quantiles"
pred_mean = np.vstack(pred_mean)
pred_var = np.vstack(pred_var)
q1 = np.vstack(q1)