mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-27 13:56:23 +02:00
Working laplace, just needs predictive values
This commit is contained in:
parent
7b0d0550cb
commit
15d5c2f22d
3 changed files with 121 additions and 46 deletions
|
|
@ -10,20 +10,23 @@ def student_t_approx():
|
|||
"""
|
||||
Example of regressing with a student t likelihood
|
||||
"""
|
||||
real_var = 0.1
|
||||
#Start a function, any function
|
||||
X = np.linspace(0.0, 10.0, 100)[:, None]
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
|
||||
X = np.linspace(0.0, 10.0, 30)[:, None]
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*real_var
|
||||
Yc = Y.copy()
|
||||
|
||||
Y = Y/Y.max()
|
||||
#Y = Y/Y.max()
|
||||
|
||||
Yc[10] += 5
|
||||
Yc[15] += 20
|
||||
Yc = Yc/Yc.max()
|
||||
#Yc[10] += 100
|
||||
Yc[25] += 10
|
||||
Yc[23] += 10
|
||||
Yc[24] += 10
|
||||
#Yc = Yc/Yc.max()
|
||||
|
||||
#Add student t random noise to datapoints
|
||||
deg_free = 1000000 #100000.5
|
||||
real_var = 0.1
|
||||
deg_free = 20 #100000.5
|
||||
real_sd = np.sqrt(real_var)
|
||||
#t_rv = t(deg_free, loc=0, scale=real_var)
|
||||
#noise = t_rvrvs(size=Y.shape)
|
||||
#Y += noise
|
||||
|
|
@ -38,36 +41,37 @@ def student_t_approx():
|
|||
#noise = t_rv.rvs(size=(len(corrupted_indices), 1))
|
||||
#Y[corrupted_indices] += noise
|
||||
plt.figure(1)
|
||||
plt.suptitle('Gaussian likelihood')
|
||||
# Kernel object
|
||||
kernel1 = GPy.kern.rbf(X.shape[1])
|
||||
kernel2 = kernel1.copy()
|
||||
kernel3 = kernel1.copy()
|
||||
kernel4 = kernel1.copy()
|
||||
|
||||
#print "Clean Gaussian"
|
||||
##A GP should completely break down due to the points as they get a lot of weight
|
||||
## create simple GP model
|
||||
#m = GPy.models.GP_regression(X, Y, kernel=kernel1)
|
||||
### optimize
|
||||
#m.ensure_default_constraints()
|
||||
##m.unconstrain('noise')
|
||||
##m.constrain_fixed('noise', 0.1)
|
||||
#m.optimize()
|
||||
## plot
|
||||
#plt.subplot(221)
|
||||
#m.plot()
|
||||
#print m
|
||||
print "Clean Gaussian"
|
||||
#A GP should completely break down due to the points as they get a lot of weight
|
||||
# create simple GP model
|
||||
m = GPy.models.GP_regression(X, Y, kernel=kernel1)
|
||||
## optimize
|
||||
m.ensure_default_constraints()
|
||||
#m.unconstrain('noise')
|
||||
#m.constrain_fixed('noise', 0.1)
|
||||
m.optimize()
|
||||
# plot
|
||||
plt.subplot(211)
|
||||
m.plot()
|
||||
print m
|
||||
|
||||
##Corrupt
|
||||
#print "Corrupt Gaussian"
|
||||
#m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
|
||||
#m.ensure_default_constraints()
|
||||
##m.unconstrain('noise')
|
||||
##m.constrain_fixed('noise', 0.1)
|
||||
#m.optimize()
|
||||
#plt.subplot(222)
|
||||
#m.plot()
|
||||
#print m
|
||||
print "Corrupt Gaussian"
|
||||
m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
|
||||
m.ensure_default_constraints()
|
||||
#m.unconstrain('noise')
|
||||
#m.constrain_fixed('noise', 0.1)
|
||||
m.optimize()
|
||||
plt.subplot(212)
|
||||
m.plot()
|
||||
print m
|
||||
|
||||
##with a student t distribution, since it has heavy tails it should work well
|
||||
##likelihood_function = student_t(deg_free, sigma=real_var)
|
||||
|
|
@ -86,9 +90,13 @@ def student_t_approx():
|
|||
##plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
|
||||
##plt.show()
|
||||
|
||||
plt.figure(2)
|
||||
plt.suptitle('Student-t likelihood')
|
||||
edited_real_sd = real_sd
|
||||
|
||||
# Likelihood object
|
||||
t_distribution = student_t(deg_free, sigma=np.sqrt(real_var))
|
||||
stu_t_likelihood = Laplace(Y, t_distribution)
|
||||
t_distribution = student_t(deg_free, sigma=edited_real_sd)
|
||||
stu_t_likelihood = Laplace(Yc, t_distribution)
|
||||
|
||||
print "Clean student t"
|
||||
m = GPy.models.GP(X, stu_t_likelihood, kernel3)
|
||||
|
|
@ -100,9 +108,11 @@ def student_t_approx():
|
|||
# plot
|
||||
plt.subplot(211)
|
||||
m.plot_f()
|
||||
plt.ylim(-2.5,2.5)
|
||||
#import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
print "Corrupt student t"
|
||||
t_distribution = student_t(deg_free, sigma=np.sqrt(real_var))
|
||||
t_distribution = student_t(deg_free, sigma=edited_real_sd)
|
||||
corrupt_stu_t_likelihood = Laplace(Yc, t_distribution)
|
||||
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
|
||||
m.ensure_default_constraints()
|
||||
|
|
@ -110,8 +120,8 @@ def student_t_approx():
|
|||
m.optimize()
|
||||
print(m)
|
||||
plt.subplot(212)
|
||||
m.plot_f()
|
||||
|
||||
m.plot()
|
||||
plt.ylim(-2.5,2.5)
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
return m
|
||||
|
|
|
|||
|
|
@ -88,11 +88,12 @@ class Laplace(likelihood):
|
|||
and $$\ln \tilde{z} = \ln z + \frac{N}{2}\ln 2\pi + \frac{1}{2}\tilde{Y}\tilde{\Sigma}^{-1}\tilde{Y}$$
|
||||
|
||||
"""
|
||||
self.Sigma_tilde_i = self.W #self.hess_hat_i
|
||||
self.Sigma_tilde_i = self.W
|
||||
#Check it isn't singular!
|
||||
epsilon = 1e-2
|
||||
epsilon = 1e-6
|
||||
if np.abs(det(self.Sigma_tilde_i)) < epsilon:
|
||||
raise ValueError("inverse covariance must be non-singular to inverse!")
|
||||
print "WARNING: Transformed covariance matrix is signular!"
|
||||
#raise ValueError("inverse covariance must be non-singular to invert!")
|
||||
#Do we really need to inverse Sigma_tilde_i? :(
|
||||
if self.likelihood_function.log_concave:
|
||||
(self.Sigma_tilde, _, _, _) = pdinv(self.Sigma_tilde_i)
|
||||
|
|
@ -110,8 +111,12 @@ class Laplace(likelihood):
|
|||
self.Y = Y_tilde[:, None]
|
||||
self.YYT = np.dot(self.Y, self.Y.T)
|
||||
self.covariance_matrix = self.Sigma_tilde
|
||||
self.precision = 1 / np.diag(self.Sigma_tilde)[:, None]
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
#if not self.likelihood_function.log_concave:
|
||||
#self.covariance_matrix[self.covariance_matrix < 0] = 1e+6 #FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
##If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
##To cause the posterior to become less certain than the prior and likelihood,
|
||||
##This is a property only held by non-log-concave likelihoods
|
||||
self.precision = 1 / np.diag(self.covariance_matrix)[:, None]
|
||||
|
||||
def fit_full(self, K):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from scipy.special import gammaln
|
||||
from scipy.special import gammaln, gamma
|
||||
from scipy import integrate
|
||||
import numpy as np
|
||||
from GPy.likelihoods.likelihood_functions import likelihood_function
|
||||
from scipy import stats
|
||||
|
|
@ -79,9 +80,68 @@ class student_t(likelihood_function):
|
|||
def predictive_values(self, mu, var):
|
||||
"""
|
||||
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
||||
"""
|
||||
mean = np.exp(mu)
|
||||
p_025 = stats.t.ppf(.025, mean)
|
||||
p_975 = stats.t.ppf(.975, mean)
|
||||
|
||||
return mean, np.nan*mean, p_025, p_975
|
||||
Need to find what the variance is at the latent points for a student t*normal
|
||||
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
|
||||
|
||||
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
|
||||
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
|
||||
"""
|
||||
#p_025 = stats.t.ppf(.025, mu)
|
||||
#p_975 = stats.t.ppf(.975, mu)
|
||||
|
||||
num_test_points = mu.shape[0]
|
||||
#Each mu is the latent point f* at the test point x*,
|
||||
#and the var is the gaussian variance at this point
|
||||
#Take lots of samples from this, so we have lots of possible values
|
||||
#for latent point f* for each test point x* weighted by how likely we were to pick it
|
||||
print "Taking %d samples of f*".format(num_test_points)
|
||||
num_f_samples = 10
|
||||
num_y_samples = 10
|
||||
student_t_means = np.random.normal(loc=mu, scale=np.sqrt(var), size=(num_test_points, num_f_samples))
|
||||
print "Student t means shape: ", student_t_means.shape
|
||||
|
||||
#Now we have lots of f*, lets work out the likelihood of getting this by sampling
|
||||
#from a student t centred on this point, sample many points from this distribution
|
||||
#centred on f*
|
||||
#for test_point, f in enumerate(student_t_means):
|
||||
#print test_point
|
||||
#print f.shape
|
||||
#student_t_samples = stats.t.rvs(self.v, loc=f[:,None],
|
||||
#scale=self.sigma,
|
||||
#size=(num_f_samples, num_y_samples))
|
||||
#print student_t_samples.shape
|
||||
|
||||
student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:,None],
|
||||
scale=self.sigma,
|
||||
size=(num_test_points, num_y_samples, num_f_samples))
|
||||
student_t_samples = np.reshape(student_t_samples,
|
||||
(num_test_points, num_y_samples*num_f_samples))
|
||||
|
||||
#Now take the 97.5 and 0.25 percentile of these points
|
||||
p_025 = stats.scoreatpercentile(student_t_samples, .025, axis=1)[:, None]
|
||||
p_975 = stats.scoreatpercentile(student_t_samples, .975, axis=1)[:, None]
|
||||
|
||||
p_025 = 1+p_025
|
||||
p_975 = 1+p_975
|
||||
|
||||
##Alernenately we could sample from int p(y|f*)p(f*|x*) df*
|
||||
def t_gaussian(f, mu, var):
|
||||
return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5))
|
||||
* ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2)))
|
||||
)
|
||||
|
||||
def t_gauss_int(mu, var):
|
||||
print "Mu: ", mu
|
||||
print "var: ", var
|
||||
result = integrate.quad(t_gaussian, -np.inf, 0.975, args=(mu, var))
|
||||
print "Result: ", result
|
||||
return result[0]
|
||||
|
||||
vec_t_gauss_int = np.vectorize(t_gauss_int)
|
||||
|
||||
p_025 = vec_t_gauss_int(mu, var)
|
||||
p_975 = vec_t_gauss_int(mu, var)
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
return mu, np.nan*mu, p_025, p_975
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue