various fixes in likelihoods, esp studentT and plotting

This commit is contained in:
James Hensman 2014-03-13 15:35:54 +00:00
parent cc96f5b3d5
commit b7508ce12b
5 changed files with 50 additions and 56 deletions

View file

@ -123,7 +123,7 @@ class GP(Model):
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata) mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
return mean, var return mean, var
def predict_quantiles(self, X, quantiles=(0.025, 0.975), Y_metadata=None): def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
m, v = self._raw_predict(X, full_cov=False) m, v = self._raw_predict(X, full_cov=False)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)

View file

@ -93,7 +93,6 @@ class Bernoulli(Likelihood):
return 0. return 0.
else: else:
return np.nan return np.nan
#raise NotImplementedError
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, extra_data=None):
""" """

View file

@ -58,6 +58,18 @@ class Likelihood(Parameterized):
""" """
return Y return Y
def conditional_mean(self, gp):
"""
The mean of the random variable conditioned on one value of the GP
"""
raise NotImplementedError
def conditional_variance(self, gp):
"""
The variance of the random variable conditioned on one value of the GP
"""
raise NotImplementedError
def log_predictive_density(self, y_test, mu_star, var_star): def log_predictive_density(self, y_test, mu_star, var_star):
""" """
Calculation of the log predictive density Calculation of the log predictive density
@ -120,7 +132,7 @@ class Likelihood(Parameterized):
return z, mean, variance return z, mean, variance
def _predictive_mean(self,mu,variance): def predictive_mean(self, mu, variance, Y_metadata=None):
""" """
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) ) Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
@ -128,14 +140,15 @@ class Likelihood(Parameterized):
:param sigma: standard deviation of posterior :param sigma: standard deviation of posterior
""" """
#conditional_mean: the edpected value of y given some f, under this likelihood
def int_mean(f,m,v): def int_mean(f,m,v):
return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m)) return self.conditional_mean(f)*np.exp(-(0.5/v)*np.square(f - m))
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean return mean
def _predictive_variance(self, mu,variance, predictive_mean=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
""" """
Numerical approximation to the predictive variance: V(Y_star) Numerical approximation to the predictive variance: V(Y_star)
@ -152,7 +165,7 @@ class Likelihood(Parameterized):
# E( V(Y_star|f_star) ) # E( V(Y_star|f_star) )
def int_var(f,m,v): def int_var(f,m,v):
return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m)) return self.conditional_variance(f)*np.exp(-(0.5/v)*np.square(f - m))
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
@ -165,13 +178,14 @@ class Likelihood(Parameterized):
#E( E(Y_star|f_star)**2 ) #E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v,predictive_mean_sq): def int_pred_mean_sq(f,m,v,predictive_mean_sq):
return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m)) return self.conditional_mean(f)**2*np.exp(-(0.5/v)*np.square(f - m))
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
var_exp = exp_exp2 - predictive_mean_sq var_exp = exp_exp2 - predictive_mean_sq
# V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) # V(Y_star) = E[ V(Y_star|f_star) ] + V[ E(Y_star|f_star) ]
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
return exp_var + var_exp return exp_var + var_exp
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, extra_data=None):
@ -373,6 +387,15 @@ class Likelihood(Parameterized):
return pred_mean, pred_var return pred_mean, pred_var
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
#compute the quantiles by sampling!!!
N_samp = 1000
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
ss_f = s.flatten()
ss_y = self.samples(ss_f)
ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
def samples(self, gp): def samples(self, gp):
""" """
@ -381,23 +404,3 @@ class Likelihood(Parameterized):
:param gp: latent variable :param gp: latent variable
""" """
raise NotImplementedError raise NotImplementedError
if sampling:
#Get gp_samples f* using posterior mean and variance
if not full_cov:
gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
size=num_samples).T
else:
gp_samples = np.random.multivariate_normal(mu.flatten(), var,
size=num_samples).T
#Push gp samples (f*) through likelihood to give p(y*|f*)
samples = self.samples(gp_samples)
axis=-1
#Calculate mean, variance and precentiles from samples
print "WARNING: Using sampling to calculate mean, variance and predictive quantiles."
pred_mean = np.mean(samples, axis=axis)[:,None]
pred_var = np.var(samples, axis=axis)[:,None]
q1 = np.percentile(samples, 2.5, axis=axis)[:,None]
q3 = np.percentile(samples, 97.5, axis=axis)[:,None]

View file

@ -131,15 +131,15 @@ class Poisson(Likelihood):
d3lik_dlink3 = 2*y/(link_f)**3 d3lik_dlink3 = 2*y/(link_f)**3
return d3lik_dlink3 return d3lik_dlink3
def _mean(self,gp): def conditional_mean(self,gp):
""" """
Mass (or density) function The mean of the random variable conditioned on one value of the GP
""" """
return self.gp_link.transf(gp) return self.gp_link.transf(gp)
def _variance(self,gp): def conditional_variance(self,gp):
""" """
Mass (or density) function The variance of the random variable conditioned on one value of the GP
""" """
return self.gp_link.transf(gp) return self.gp_link.transf(gp)

View file

@ -9,6 +9,7 @@ from scipy import stats, integrate
from scipy.special import gammaln, gamma from scipy.special import gammaln, gamma
from likelihood import Likelihood from likelihood import Likelihood
from ..core.parameterization import Param from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
class StudentT(Likelihood): class StudentT(Likelihood):
""" """
@ -28,7 +29,7 @@ class StudentT(Likelihood):
self.sigma2 = Param('t_noise', float(sigma2)) self.sigma2 = Param('t_noise', float(sigma2))
self.v = Param('deg_free', float(deg_free)) self.v = Param('deg_free', float(deg_free))
self.add_parameter(self.sigma2) self.add_parameter(self.sigma2, Logexp())
self.add_parameter(self.v) self.add_parameter(self.v)
self.v.constrain_fixed() self.v.constrain_fixed()
@ -244,32 +245,23 @@ class StudentT(Likelihood):
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_variance(self, mu, sigma, predictive_mean=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
"""
Compute predictive variance of student_t*normal p(y*|f*)p(f*)
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
"""
#FIXME: Not correct
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
#Which was also given to us as (var)
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
true_var = 1/(1/sigma**2 + 1/self.variance)
return true_var
def predictive_mean(self, mu, sigma):
""" """
Compute mean of the prediction Compute mean of the prediction
""" """
#FIXME: Not correct return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
return mu
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
if self.deg_free <2.:
return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom
else:
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
def conditional_mean(self, gp):
return self.gp_link.transf(gp)
def conditional_variance(self, gp):
return self.deg_free/(self.deg_free - 2.)
def samples(self, gp): def samples(self, gp):
""" """