mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-02 00:02:38 +02:00
Added predicted values for student t, works well
This commit is contained in:
parent
15d5c2f22d
commit
ffc168c1d2
2 changed files with 53 additions and 36 deletions
|
|
@ -18,7 +18,7 @@ def student_t_approx():
|
|||
|
||||
#Y = Y/Y.max()
|
||||
|
||||
#Yc[10] += 100
|
||||
Yc[10] += 100
|
||||
Yc[25] += 10
|
||||
Yc[23] += 10
|
||||
Yc[24] += 10
|
||||
|
|
@ -52,51 +52,30 @@ def student_t_approx():
|
|||
#A GP should completely break down due to the points as they get a lot of weight
|
||||
# create simple GP model
|
||||
m = GPy.models.GP_regression(X, Y, kernel=kernel1)
|
||||
## optimize
|
||||
# optimize
|
||||
m.ensure_default_constraints()
|
||||
#m.unconstrain('noise')
|
||||
#m.constrain_fixed('noise', 0.1)
|
||||
m.optimize()
|
||||
# plot
|
||||
plt.subplot(211)
|
||||
m.plot()
|
||||
print m
|
||||
|
||||
##Corrupt
|
||||
#Corrupt
|
||||
print "Corrupt Gaussian"
|
||||
m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
|
||||
m.ensure_default_constraints()
|
||||
#m.unconstrain('noise')
|
||||
#m.constrain_fixed('noise', 0.1)
|
||||
m.optimize()
|
||||
plt.subplot(212)
|
||||
m.plot()
|
||||
print m
|
||||
|
||||
##with a student t distribution, since it has heavy tails it should work well
|
||||
##likelihood_function = student_t(deg_free, sigma=real_var)
|
||||
##lap = Laplace(Y, likelihood_function)
|
||||
##cov = kernel.K(X)
|
||||
##lap.fit_full(cov)
|
||||
|
||||
##test_range = np.arange(0, 10, 0.1)
|
||||
##plt.plot(test_range, t_rv.pdf(test_range))
|
||||
##for i in xrange(X.shape[0]):
|
||||
##mode = lap.f_hat[i]
|
||||
##covariance = lap.hess_hat_i[i,i]
|
||||
##scaling = np.exp(lap.ln_z_hat)
|
||||
##normalised_approx = norm(loc=mode, scale=covariance)
|
||||
##print "Normal with mode %f, and variance %f" % (mode, covariance)
|
||||
##plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
|
||||
##plt.show()
|
||||
|
||||
plt.figure(2)
|
||||
plt.suptitle('Student-t likelihood')
|
||||
edited_real_sd = real_sd
|
||||
|
||||
# Likelihood object
|
||||
t_distribution = student_t(deg_free, sigma=edited_real_sd)
|
||||
stu_t_likelihood = Laplace(Yc, t_distribution)
|
||||
stu_t_likelihood = Laplace(Y, t_distribution)
|
||||
|
||||
print "Clean student t"
|
||||
m = GPy.models.GP(X, stu_t_likelihood, kernel3)
|
||||
|
|
@ -107,7 +86,7 @@ def student_t_approx():
|
|||
print(m)
|
||||
# plot
|
||||
plt.subplot(211)
|
||||
m.plot_f()
|
||||
m.plot()
|
||||
plt.ylim(-2.5,2.5)
|
||||
#import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
|
|
@ -124,6 +103,23 @@ def student_t_approx():
|
|||
plt.ylim(-2.5,2.5)
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
###with a student t distribution, since it has heavy tails it should work well
|
||||
###likelihood_function = student_t(deg_free, sigma=real_var)
|
||||
###lap = Laplace(Y, likelihood_function)
|
||||
###cov = kernel.K(X)
|
||||
###lap.fit_full(cov)
|
||||
|
||||
###test_range = np.arange(0, 10, 0.1)
|
||||
###plt.plot(test_range, t_rv.pdf(test_range))
|
||||
###for i in xrange(X.shape[0]):
|
||||
###mode = lap.f_hat[i]
|
||||
###covariance = lap.hess_hat_i[i,i]
|
||||
###scaling = np.exp(lap.ln_z_hat)
|
||||
###normalised_approx = norm(loc=mode, scale=covariance)
|
||||
###print "Normal with mode %f, and variance %f" % (mode, covariance)
|
||||
###plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
|
||||
###plt.show()
|
||||
|
||||
return m
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,10 @@ class student_t(likelihood_function):
|
|||
#FIXME: This should be in the superclass
|
||||
self.log_concave = False
|
||||
|
||||
@property
|
||||
def variance(self):
|
||||
return (self.v / float(self.v - 2)) * (self.sigma**2)
|
||||
|
||||
def link_function(self, y, f):
|
||||
"""link_function $\ln p(y|f)$
|
||||
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$
|
||||
|
|
@ -79,14 +83,32 @@ class student_t(likelihood_function):
|
|||
|
||||
def predictive_values(self, mu, var):
|
||||
"""
|
||||
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
||||
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
||||
|
||||
Need to find what the variance is at the latent points for a student t*normal
|
||||
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
|
||||
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
|
||||
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
|
||||
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
|
||||
|
||||
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
|
||||
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
|
||||
"""
|
||||
|
||||
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
|
||||
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
|
||||
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
|
||||
#Which was also given to us as (var)
|
||||
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
|
||||
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
|
||||
true_var = var + self.variance
|
||||
|
||||
#Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now
|
||||
#need the 95 and 5 percentiles.
|
||||
#FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles
|
||||
p_025 = mu - 2.*true_var
|
||||
p_975 = mu + 2.*true_var
|
||||
|
||||
return mu, np.nan*mu, p_025, p_975
|
||||
|
||||
def sample_predicted_values(self, mu, var):
|
||||
""" Experimental sample approches and numerical integration """
|
||||
#p_025 = stats.t.ppf(.025, mu)
|
||||
#p_975 = stats.t.ppf(.975, mu)
|
||||
|
||||
|
|
@ -134,14 +156,13 @@ class student_t(likelihood_function):
|
|||
def t_gauss_int(mu, var):
|
||||
print "Mu: ", mu
|
||||
print "var: ", var
|
||||
result = integrate.quad(t_gaussian, -np.inf, 0.975, args=(mu, var))
|
||||
result = integrate.quad(t_gaussian, 0.025, 0.975, args=(mu, var))
|
||||
print "Result: ", result
|
||||
return result[0]
|
||||
|
||||
vec_t_gauss_int = np.vectorize(t_gauss_int)
|
||||
|
||||
p_025 = vec_t_gauss_int(mu, var)
|
||||
p_975 = vec_t_gauss_int(mu, var)
|
||||
p = vec_t_gauss_int(mu, var)
|
||||
p_025 = mu - p
|
||||
p_975 = mu + p
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
return mu, np.nan*mu, p_025, p_975
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue