mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-29 14:56:24 +02:00
Seemed to be working, now its not
This commit is contained in:
parent
474d5484b0
commit
7b0d0550cb
2 changed files with 92 additions and 63 deletions
|
|
@ -11,15 +11,22 @@ def student_t_approx():
|
|||
Example of regressing with a student t likelihood
|
||||
"""
|
||||
#Start a function, any function
|
||||
X = np.sort(np.random.uniform(0, 15, 100))[:, None]
|
||||
Y = np.sin(X)
|
||||
X = np.linspace(0.0, 10.0, 100)[:, None]
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
|
||||
Yc = Y.copy()
|
||||
|
||||
Y = Y/Y.max()
|
||||
|
||||
Yc[10] += 5
|
||||
Yc[15] += 20
|
||||
Yc = Yc/Yc.max()
|
||||
|
||||
#Add student t random noise to datapoints
|
||||
deg_free = 100000.5
|
||||
real_var = 4
|
||||
t_rv = t(deg_free, loc=0, scale=real_var)
|
||||
noise = t_rv.rvs(size=Y.shape)
|
||||
Y += noise
|
||||
deg_free = 1000000 #100000.5
|
||||
real_var = 0.1
|
||||
#t_rv = t(deg_free, loc=0, scale=real_var)
|
||||
#noise = t_rvrvs(size=Y.shape)
|
||||
#Y += noise
|
||||
|
||||
#Add some extreme value noise to some of the datapoints
|
||||
#percent_corrupted = 0.15
|
||||
|
|
@ -30,64 +37,83 @@ def student_t_approx():
|
|||
#print corrupted_indices
|
||||
#noise = t_rv.rvs(size=(len(corrupted_indices), 1))
|
||||
#Y[corrupted_indices] += noise
|
||||
|
||||
plt.figure(1)
|
||||
# Kernel object
|
||||
print X.shape
|
||||
kernel = GPy.kern.rbf(X.shape[1])
|
||||
kernel1 = GPy.kern.rbf(X.shape[1])
|
||||
kernel2 = kernel1.copy()
|
||||
kernel3 = kernel1.copy()
|
||||
kernel4 = kernel1.copy()
|
||||
|
||||
#A GP should completely break down due to the points as they get a lot of weight
|
||||
# create simple GP model
|
||||
#m = GPy.models.GP_regression(X, Y, kernel=kernel)
|
||||
|
||||
## optimize
|
||||
#print "Clean Gaussian"
|
||||
##A GP should completely break down due to the points as they get a lot of weight
|
||||
## create simple GP model
|
||||
#m = GPy.models.GP_regression(X, Y, kernel=kernel1)
|
||||
### optimize
|
||||
#m.ensure_default_constraints()
|
||||
##m.unconstrain('noise')
|
||||
##m.constrain_fixed('noise', 0.1)
|
||||
#m.optimize()
|
||||
## plot
|
||||
##m.plot()
|
||||
#plt.subplot(221)
|
||||
#m.plot()
|
||||
#print m
|
||||
|
||||
#with a student t distribution, since it has heavy tails it should work well
|
||||
likelihood_function = student_t(deg_free, sigma=real_var)
|
||||
lap = Laplace(Y, likelihood_function)
|
||||
cov = kernel.K(X)
|
||||
lap.fit_full(cov)
|
||||
##Corrupt
|
||||
#print "Corrupt Gaussian"
|
||||
#m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
|
||||
#m.ensure_default_constraints()
|
||||
##m.unconstrain('noise')
|
||||
##m.constrain_fixed('noise', 0.1)
|
||||
#m.optimize()
|
||||
#plt.subplot(222)
|
||||
#m.plot()
|
||||
#print m
|
||||
|
||||
test_range = np.arange(0, 10, 0.1)
|
||||
plt.plot(test_range, t_rv.pdf(test_range))
|
||||
for i in xrange(X.shape[0]):
|
||||
mode = lap.f_hat[i]
|
||||
covariance = lap.hess_hat_i[i,i]
|
||||
scaling = np.exp(lap.ln_z_hat)
|
||||
normalised_approx = norm(loc=mode, scale=covariance)
|
||||
print "Normal with mode %f, and variance %f" % (mode, covariance)
|
||||
plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
|
||||
plt.show()
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
##with a student t distribution, since it has heavy tails it should work well
|
||||
##likelihood_function = student_t(deg_free, sigma=real_var)
|
||||
##lap = Laplace(Y, likelihood_function)
|
||||
##cov = kernel.K(X)
|
||||
##lap.fit_full(cov)
|
||||
|
||||
##test_range = np.arange(0, 10, 0.1)
|
||||
##plt.plot(test_range, t_rv.pdf(test_range))
|
||||
##for i in xrange(X.shape[0]):
|
||||
##mode = lap.f_hat[i]
|
||||
##covariance = lap.hess_hat_i[i,i]
|
||||
##scaling = np.exp(lap.ln_z_hat)
|
||||
##normalised_approx = norm(loc=mode, scale=covariance)
|
||||
##print "Normal with mode %f, and variance %f" % (mode, covariance)
|
||||
##plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
|
||||
##plt.show()
|
||||
|
||||
# Likelihood object
|
||||
t_distribution = student_t(deg_free, sigma=real_var)
|
||||
t_distribution = student_t(deg_free, sigma=np.sqrt(real_var))
|
||||
stu_t_likelihood = Laplace(Y, t_distribution)
|
||||
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
|
||||
m = GPy.models.GP(X, stu_t_likelihood, kernel)
|
||||
print "Clean student t"
|
||||
m = GPy.models.GP(X, stu_t_likelihood, kernel3)
|
||||
m.ensure_default_constraints()
|
||||
|
||||
m.update_likelihood_approximation()
|
||||
print "NEW MODEL"
|
||||
print(m)
|
||||
|
||||
# optimize
|
||||
#m.optimize()
|
||||
#print(m)
|
||||
|
||||
# plot
|
||||
m.plot()
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
m.optimize()
|
||||
print(m)
|
||||
# plot
|
||||
plt.subplot(211)
|
||||
m.plot_f()
|
||||
|
||||
print "Corrupt student t"
|
||||
t_distribution = student_t(deg_free, sigma=np.sqrt(real_var))
|
||||
corrupt_stu_t_likelihood = Laplace(Yc, t_distribution)
|
||||
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
|
||||
m.ensure_default_constraints()
|
||||
m.update_likelihood_approximation()
|
||||
m.optimize()
|
||||
print(m)
|
||||
plt.subplot(212)
|
||||
m.plot_f()
|
||||
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
return m
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -33,13 +33,15 @@ class Laplace(likelihood):
|
|||
|
||||
#Inital values
|
||||
self.N, self.D = self.data.shape
|
||||
self.is_heteroscedastic = True
|
||||
self.Nparams = 0
|
||||
|
||||
self.NORMAL_CONST = -((0.5 * self.N) * np.log(2 * np.pi))
|
||||
|
||||
#Initial values for the GP variables
|
||||
self.Y = np.zeros((self.N,1))
|
||||
self.Y = np.zeros((self.N, 1))
|
||||
self.covariance_matrix = np.eye(self.N)
|
||||
self.precision = np.ones(self.N)[:,None]
|
||||
self.precision = np.ones(self.N)[:, None]
|
||||
self.Z = 0
|
||||
self.YYT = None
|
||||
|
||||
|
|
@ -58,6 +60,7 @@ class Laplace(likelihood):
|
|||
pass # TODO: Laplace likelihood might want to take some parameters...
|
||||
|
||||
def _gradients(self, partial):
|
||||
#return np.zeros(0) # TODO: Laplace likelihood might want to take some parameters...
|
||||
return np.zeros(0) # TODO: Laplace likelihood might want to take some parameters...
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
@ -88,10 +91,8 @@ class Laplace(likelihood):
|
|||
self.Sigma_tilde_i = self.W #self.hess_hat_i
|
||||
#Check it isn't singular!
|
||||
epsilon = 1e-2
|
||||
"""
|
||||
if np.abs(det(self.Sigma_tilde_i)) < epsilon:
|
||||
raise ValueError("inverse covariance must be non-singular to inverse!")
|
||||
"""
|
||||
#Do we really need to inverse Sigma_tilde_i? :(
|
||||
if self.likelihood_function.log_concave:
|
||||
(self.Sigma_tilde, _, _, _) = pdinv(self.Sigma_tilde_i)
|
||||
|
|
@ -99,21 +100,17 @@ class Laplace(likelihood):
|
|||
self.Sigma_tilde = inv(self.Sigma_tilde_i)
|
||||
#f_hat? should be f but we must have optimized for them I guess?
|
||||
Y_tilde = mdot(self.Sigma_tilde, self.hess_hat, self.f_hat)
|
||||
#Z_tilde = (self.ln_z_hat - self.NORMAL_CONST
|
||||
#- 0.5*mdot(self.f_hat, self.hess_hat, self.f_hat)
|
||||
#+ 0.5*mdot(Y_tilde.T, (self.Sigma_tilde_i, Y_tilde))
|
||||
#)
|
||||
Z_tilde = (self.ln_z_hat - self.NORMAL_CONST
|
||||
+ 0.5*self.log_hess_hat_det
|
||||
+ 0.5*mdot(self.f_hat, self.Ki , self.f_hat)
|
||||
+ 0.5*mdot(Y_tilde.T, (self.Sigma_tilde_i, Y_tilde))
|
||||
+ 0.5*mdot(self.f_hat, self.hess_hat, self.f_hat)
|
||||
+ 0.5*mdot(Y_tilde.T, (self.Sigma_tilde_i, Y_tilde))
|
||||
- mdot(Y_tilde.T, (self.Sigma_tilde_i, self.f_hat))
|
||||
)
|
||||
|
||||
self.Z = Z_tilde
|
||||
self.Y = Y_tilde
|
||||
self.Y = Y_tilde[:, None]
|
||||
self.YYT = np.dot(self.Y, self.Y.T)
|
||||
self.covariance_matrix = self.Sigma_tilde
|
||||
self.precision = 1 / np.diag(self.Sigma_tilde)[:, None]
|
||||
self.YYT = np.dot(self.Y, self.Y.T)
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
def fit_full(self, K):
|
||||
|
|
@ -122,6 +119,7 @@ class Laplace(likelihood):
|
|||
For nomenclature see Rasmussen & Williams 2006
|
||||
:K: Covariance matrix
|
||||
"""
|
||||
self.K = K.copy()
|
||||
f = np.zeros((self.N, 1))
|
||||
(self.Ki, _, _, self.log_Kdet) = pdinv(K)
|
||||
LOG_K_CONST = -(0.5 * self.log_Kdet)
|
||||
|
|
@ -148,6 +146,11 @@ class Laplace(likelihood):
|
|||
|
||||
#At this point get the hessian matrix
|
||||
self.W = -np.diag(self.likelihood_function.link_hess(self.data[:, 0], self.f_hat))
|
||||
if not self.likelihood_function.log_concave:
|
||||
self.W[self.W < 0] = 1e-6 #FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
#If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
#To cause the posterior to become less certain than the prior and likelihood,
|
||||
#This is a property only held by non-log-concave likelihoods
|
||||
self.hess_hat = self.Ki + self.W
|
||||
(self.hess_hat_i, _, _, self.log_hess_hat_det) = pdinv(self.hess_hat)
|
||||
|
||||
|
|
@ -166,10 +169,10 @@ class Laplace(likelihood):
|
|||
#the area of p(f)p(y|f) we do this by matching the height of the distributions at the mode
|
||||
#z_hat = -0.5*ln|H| - 0.5*ln|K| - 0.5*f_hat*K^{-1}*f_hat \sum_{n} ln p(y_n|f_n)
|
||||
#Unsure whether its log_hess or log_hess_i
|
||||
self.ln_z_hat = (-0.5*self.log_hess_hat_det
|
||||
- 0.5*self.log_Kdet
|
||||
-1*self.likelihood_function.link_function(self.data[:,0], self.f_hat)
|
||||
- mdot(self.f_hat.T, (self.Ki, self.f_hat))
|
||||
self.ln_z_hat = (- 0.5*self.log_hess_hat_det
|
||||
+ 0.5*self.log_Kdet
|
||||
+ self.likelihood_function.link_function(self.data[:,0], self.f_hat)
|
||||
- 0.5*mdot(self.f_hat.T, (self.Ki, self.f_hat))
|
||||
)
|
||||
|
||||
return self._compute_GP_variables()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue