Added predicted values for student t, works well

This commit is contained in:
Alan Saul 2013-04-02 12:33:01 +01:00
parent 15d5c2f22d
commit ffc168c1d2
2 changed files with 53 additions and 36 deletions

View file

@ -18,7 +18,7 @@ def student_t_approx():
#Y = Y/Y.max()
#Yc[10] += 100
Yc[10] += 100
Yc[25] += 10
Yc[23] += 10
Yc[24] += 10
@ -52,51 +52,30 @@ def student_t_approx():
#A GP should completely break down due to the points as they get a lot of weight
# create simple GP model
m = GPy.models.GP_regression(X, Y, kernel=kernel1)
## optimize
# optimize
m.ensure_default_constraints()
#m.unconstrain('noise')
#m.constrain_fixed('noise', 0.1)
m.optimize()
# plot
plt.subplot(211)
m.plot()
print m
##Corrupt
#Corrupt
print "Corrupt Gaussian"
m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
m.ensure_default_constraints()
#m.unconstrain('noise')
#m.constrain_fixed('noise', 0.1)
m.optimize()
plt.subplot(212)
m.plot()
print m
##with a student t distribution, since it has heavy tails it should work well
##likelihood_function = student_t(deg_free, sigma=real_var)
##lap = Laplace(Y, likelihood_function)
##cov = kernel.K(X)
##lap.fit_full(cov)
##test_range = np.arange(0, 10, 0.1)
##plt.plot(test_range, t_rv.pdf(test_range))
##for i in xrange(X.shape[0]):
##mode = lap.f_hat[i]
##covariance = lap.hess_hat_i[i,i]
##scaling = np.exp(lap.ln_z_hat)
##normalised_approx = norm(loc=mode, scale=covariance)
##print "Normal with mode %f, and variance %f" % (mode, covariance)
##plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
##plt.show()
plt.figure(2)
plt.suptitle('Student-t likelihood')
edited_real_sd = real_sd
# Likelihood object
t_distribution = student_t(deg_free, sigma=edited_real_sd)
stu_t_likelihood = Laplace(Yc, t_distribution)
stu_t_likelihood = Laplace(Y, t_distribution)
print "Clean student t"
m = GPy.models.GP(X, stu_t_likelihood, kernel3)
@ -107,7 +86,7 @@ def student_t_approx():
print(m)
# plot
plt.subplot(211)
m.plot_f()
m.plot()
plt.ylim(-2.5,2.5)
#import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
@ -124,6 +103,23 @@ def student_t_approx():
plt.ylim(-2.5,2.5)
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
###with a student t distribution, since it has heavy tails it should work well
###likelihood_function = student_t(deg_free, sigma=real_var)
###lap = Laplace(Y, likelihood_function)
###cov = kernel.K(X)
###lap.fit_full(cov)
###test_range = np.arange(0, 10, 0.1)
###plt.plot(test_range, t_rv.pdf(test_range))
###for i in xrange(X.shape[0]):
###mode = lap.f_hat[i]
###covariance = lap.hess_hat_i[i,i]
###scaling = np.exp(lap.ln_z_hat)
###normalised_approx = norm(loc=mode, scale=covariance)
###print "Normal with mode %f, and variance %f" % (mode, covariance)
###plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
###plt.show()
return m

View file

@ -23,6 +23,10 @@ class student_t(likelihood_function):
#FIXME: This should be in the superclass
self.log_concave = False
@property
def variance(self):
return (self.v / float(self.v - 2)) * (self.sigma**2)
def link_function(self, y, f):
"""link_function $\ln p(y|f)$
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$
@ -79,14 +83,32 @@ class student_t(likelihood_function):
def predictive_values(self, mu, var):
"""
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
Need to find what the variance is at the latent points for a student t*normal
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
"""
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
#Which was also given to us as (var)
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
true_var = var + self.variance
#Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now
#need the 95 and 5 percentiles.
#FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles
p_025 = mu - 2.*true_var
p_975 = mu + 2.*true_var
return mu, np.nan*mu, p_025, p_975
def sample_predicted_values(self, mu, var):
""" Experimental sample approches and numerical integration """
#p_025 = stats.t.ppf(.025, mu)
#p_975 = stats.t.ppf(.975, mu)
@ -134,14 +156,13 @@ class student_t(likelihood_function):
def t_gauss_int(mu, var):
print "Mu: ", mu
print "var: ", var
result = integrate.quad(t_gaussian, -np.inf, 0.975, args=(mu, var))
result = integrate.quad(t_gaussian, 0.025, 0.975, args=(mu, var))
print "Result: ", result
return result[0]
vec_t_gauss_int = np.vectorize(t_gauss_int)
p_025 = vec_t_gauss_int(mu, var)
p_975 = vec_t_gauss_int(mu, var)
p = vec_t_gauss_int(mu, var)
p_025 = mu - p
p_975 = mu + p
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
return mu, np.nan*mu, p_025, p_975