mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-08 15:05:15 +02:00
Changed some parameters of the laplace, tidied up examples
This commit is contained in:
parent
50e9034a6d
commit
0a43329150
2 changed files with 105 additions and 96 deletions
|
|
@ -2,22 +2,21 @@ import GPy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from GPy.util import datasets
|
from GPy.util import datasets
|
||||||
#np.random.seed(1)
|
|
||||||
|
|
||||||
def student_t_approx():
|
def student_t_approx(optimize=True, plot=True):
|
||||||
"""
|
"""
|
||||||
Example of regressing with a student t likelihood
|
Example of regressing with a student t likelihood using Laplace
|
||||||
"""
|
"""
|
||||||
real_std = 0.1
|
real_std = 0.1
|
||||||
#Start a function, any function
|
#Start a function, any function
|
||||||
X = np.linspace(0.0, np.pi*2, 100)[:, None]
|
X = np.linspace(0.0, np.pi*2, 100)[:, None]
|
||||||
Y = np.sin(X) + np.random.randn(*X.shape)*real_std
|
Y = np.sin(X) + np.random.randn(*X.shape)*real_std
|
||||||
|
Y = Y/Y.max()
|
||||||
Yc = Y.copy()
|
Yc = Y.copy()
|
||||||
|
|
||||||
X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
|
X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
|
||||||
Y_full = np.sin(X_full)
|
Y_full = np.sin(X_full)
|
||||||
|
Y_full = Y_full/Y_full.max()
|
||||||
Y = Y/Y.max()
|
|
||||||
|
|
||||||
#Slightly noisy data
|
#Slightly noisy data
|
||||||
Yc[75:80] += 1
|
Yc[75:80] += 1
|
||||||
|
|
@ -34,94 +33,93 @@ def student_t_approx():
|
||||||
deg_free = 5
|
deg_free = 5
|
||||||
print "Real noise: ", real_std
|
print "Real noise: ", real_std
|
||||||
initial_var_guess = 0.5
|
initial_var_guess = 0.5
|
||||||
|
edited_real_sd = initial_var_guess
|
||||||
|
|
||||||
#t_rv = t(deg_free, loc=0, scale=real_var)
|
|
||||||
#noise = t_rvrvs(size=Y.shape)
|
|
||||||
#Y += noise
|
|
||||||
|
|
||||||
plt.figure(1)
|
|
||||||
plt.suptitle('Gaussian likelihood')
|
|
||||||
# Kernel object
|
# Kernel object
|
||||||
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||||
kernel2 = kernel1.copy()
|
kernel2 = kernel1.copy()
|
||||||
kernel3 = kernel1.copy()
|
kernel3 = kernel1.copy()
|
||||||
kernel4 = kernel1.copy()
|
kernel4 = kernel1.copy()
|
||||||
kernel5 = kernel1.copy()
|
|
||||||
kernel6 = kernel1.copy()
|
|
||||||
|
|
||||||
print "Clean Gaussian"
|
#Gaussian GP model on clean data
|
||||||
#A GP should completely break down due to the points as they get a lot of weight
|
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
|
||||||
# create simple GP model
|
|
||||||
m = GPy.models.GPRegression(X, Y, kernel=kernel1)
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m1.ensure_default_constraints()
|
||||||
m.constrain_fixed('white', 1e-4)
|
m1.constrain_fixed('white', 1e-5)
|
||||||
m.randomize()
|
m1.randomize()
|
||||||
m.optimize()
|
|
||||||
# plot
|
|
||||||
ax = plt.subplot(211)
|
|
||||||
m.plot(ax=ax)
|
|
||||||
plt.plot(X_full, Y_full)
|
|
||||||
plt.ylim(-1.5, 1.5)
|
|
||||||
plt.title('Gaussian clean')
|
|
||||||
print m
|
|
||||||
|
|
||||||
#Corrupt
|
#Gaussian GP model on corrupt data
|
||||||
print "Corrupt Gaussian"
|
m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
|
||||||
m = GPy.models.GPRegression(X, Yc, kernel=kernel2)
|
m2.ensure_default_constraints()
|
||||||
m.ensure_default_constraints()
|
m2.constrain_fixed('white', 1e-5)
|
||||||
m.constrain_fixed('white', 1e-4)
|
m2.randomize()
|
||||||
m.randomize()
|
|
||||||
m.optimize()
|
|
||||||
ax = plt.subplot(212)
|
|
||||||
m.plot(ax=ax)
|
|
||||||
plt.plot(X_full, Y_full)
|
|
||||||
plt.ylim(-1.5, 1.5)
|
|
||||||
plt.title('Gaussian corrupt')
|
|
||||||
print m
|
|
||||||
|
|
||||||
plt.figure(2)
|
#Student t GP model on clean data
|
||||||
plt.suptitle('Student-t likelihood')
|
|
||||||
edited_real_sd = initial_var_guess
|
|
||||||
|
|
||||||
print "Clean student t, rasm"
|
|
||||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
|
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
|
||||||
m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood)
|
m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood)
|
||||||
m.ensure_default_constraints()
|
m3.ensure_default_constraints()
|
||||||
m.constrain_positive('t_noise')
|
m3.constrain_bounded('t_noise', 1e-6, 10.)
|
||||||
m.constrain_fixed('white', 1e-4)
|
m3.constrain_fixed('white', 1e-5)
|
||||||
m.randomize()
|
m3.randomize()
|
||||||
#m.update_likelihood_approximation()
|
|
||||||
m.optimize()
|
|
||||||
print(m)
|
|
||||||
ax = plt.subplot(211)
|
|
||||||
m.plot(ax=ax)
|
|
||||||
plt.plot(X_full, Y_full)
|
|
||||||
plt.ylim(-1.5, 1.5)
|
|
||||||
plt.title('Student-t rasm clean')
|
|
||||||
|
|
||||||
print "Corrupt student t, rasm"
|
#Student t GP model on corrupt data
|
||||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||||
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
|
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
|
||||||
m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
|
m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
|
||||||
m.ensure_default_constraints()
|
m4.ensure_default_constraints()
|
||||||
m.constrain_bounded('t_noise', 1e-6, 10.)
|
m4.constrain_bounded('t_noise', 1e-6, 10.)
|
||||||
m.constrain_fixed('white', 1e-4)
|
m4.constrain_fixed('white', 1e-5)
|
||||||
m.randomize()
|
m4.randomize()
|
||||||
for a in range(1):
|
|
||||||
m.randomize()
|
|
||||||
m_start = m.copy()
|
|
||||||
print m
|
|
||||||
m.optimize('scg', messages=1)
|
|
||||||
print(m)
|
|
||||||
ax = plt.subplot(212)
|
|
||||||
m.plot(ax=ax)
|
|
||||||
plt.plot(X_full, Y_full)
|
|
||||||
plt.ylim(-1.5, 1.5)
|
|
||||||
plt.title('Student-t rasm corrupt')
|
|
||||||
|
|
||||||
return m
|
if optimize:
|
||||||
|
optimizer='scg'
|
||||||
|
print "Clean Gaussian"
|
||||||
|
m1.optimize(optimizer, messages=1)
|
||||||
|
print "Corrupt Gaussian"
|
||||||
|
m2.optimize(optimizer, messages=1)
|
||||||
|
print "Clean student t"
|
||||||
|
m3.optimize(optimizer, messages=1)
|
||||||
|
print "Corrupt student t"
|
||||||
|
m4.optimize(optimizer, messages=1)
|
||||||
|
|
||||||
|
if False:
|
||||||
|
print m1
|
||||||
|
print m3
|
||||||
|
plt.figure(3)
|
||||||
|
plt.scatter(X, m1.likelihood.Y, c='g')
|
||||||
|
plt.scatter(X, m3.likelihood.Y, c='r')
|
||||||
|
|
||||||
|
if plot:
|
||||||
|
plt.figure(1)
|
||||||
|
plt.suptitle('Gaussian likelihood')
|
||||||
|
ax = plt.subplot(211)
|
||||||
|
m1.plot(ax=ax)
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-1.5, 1.5)
|
||||||
|
plt.title('Gaussian clean')
|
||||||
|
|
||||||
|
ax = plt.subplot(212)
|
||||||
|
m2.plot(ax=ax)
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-1.5, 1.5)
|
||||||
|
plt.title('Gaussian corrupt')
|
||||||
|
|
||||||
|
plt.figure(2)
|
||||||
|
plt.suptitle('Student-t likelihood')
|
||||||
|
ax = plt.subplot(211)
|
||||||
|
m3.plot(ax=ax)
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-1.5, 1.5)
|
||||||
|
plt.title('Student-t rasm clean')
|
||||||
|
|
||||||
|
ax = plt.subplot(212)
|
||||||
|
m4.plot(ax=ax)
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-1.5, 1.5)
|
||||||
|
plt.title('Student-t rasm corrupt')
|
||||||
|
|
||||||
|
return m1, m2, m3, m4
|
||||||
|
|
||||||
def boston_example():
|
def boston_example():
|
||||||
import sklearn
|
import sklearn
|
||||||
|
|
@ -294,3 +292,4 @@ def precipitation_example():
|
||||||
for n, (train, test) in enumerate(kf):
|
for n, (train, test) in enumerate(kf):
|
||||||
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
||||||
print "Fold {}".format(n)
|
print "Fold {}".format(n)
|
||||||
|
|
||||||
|
|
@ -15,6 +15,7 @@ import scipy as sp
|
||||||
from likelihood import likelihood
|
from likelihood import likelihood
|
||||||
from ..util.linalg import mdot, jitchol, pddet, dpotrs
|
from ..util.linalg import mdot, jitchol, pddet, dpotrs
|
||||||
from functools import partial as partial_func
|
from functools import partial as partial_func
|
||||||
|
import warnings
|
||||||
|
|
||||||
class Laplace(likelihood):
|
class Laplace(likelihood):
|
||||||
"""Laplace approximation to a posterior"""
|
"""Laplace approximation to a posterior"""
|
||||||
|
|
@ -64,6 +65,7 @@ class Laplace(likelihood):
|
||||||
self.YYT = None
|
self.YYT = None
|
||||||
|
|
||||||
self.old_Ki_f = None
|
self.old_Ki_f = None
|
||||||
|
self.bad_fhat = False
|
||||||
|
|
||||||
def predictive_values(self,mu,var,full_cov,**noise_args):
|
def predictive_values(self,mu,var,full_cov,**noise_args):
|
||||||
if full_cov:
|
if full_cov:
|
||||||
|
|
@ -198,18 +200,16 @@ class Laplace(likelihood):
|
||||||
Y_tilde = Wi*self.Ki_f + self.f_hat
|
Y_tilde = Wi*self.Ki_f + self.f_hat
|
||||||
|
|
||||||
self.Wi_K_i = self.W12BiW12
|
self.Wi_K_i = self.W12BiW12
|
||||||
self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
|
ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
|
||||||
self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
|
lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
|
y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
|
||||||
|
|
||||||
Z_tilde = (+ self.lik
|
Z_tilde = (+ lik
|
||||||
- 0.5*self.ln_B_det
|
- 0.5*self.ln_B_det
|
||||||
+ 0.5*self.ln_det_Wi_K
|
+ 0.5*ln_det_Wi_K
|
||||||
- 0.5*self.f_Ki_f
|
- 0.5*self.f_Ki_f
|
||||||
+ 0.5*self.y_Wi_Ki_i_y
|
+ 0.5*y_Wi_K_i_y
|
||||||
)
|
)
|
||||||
#print "Term, {}, {}, {}, {}, {}".format(self.lik, - 0.5*self.ln_B_det, + 0.5*self.ln_det_Wi_K, - 0.5*self.f_Ki_f, + 0.5*self.y_Wi_Ki_i_y)
|
|
||||||
|
|
||||||
#Convert to float as its (1, 1) and Z must be a scalar
|
#Convert to float as its (1, 1) and Z must be a scalar
|
||||||
self.Z = np.float64(Z_tilde)
|
self.Z = np.float64(Z_tilde)
|
||||||
self.Y = Y_tilde
|
self.Y = Y_tilde
|
||||||
|
|
@ -247,7 +247,10 @@ class Laplace(likelihood):
|
||||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||||
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
|
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
|
if not self.noise_model.log_concave:
|
||||||
|
#print "Under 1e-10: {}".format(np.sum(self.W < 1e-6))
|
||||||
|
self.W[self.W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||||
|
|
||||||
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
|
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
|
||||||
|
|
||||||
self.Ki_f = self.Ki_f
|
self.Ki_f = self.Ki_f
|
||||||
|
|
@ -283,11 +286,11 @@ class Laplace(likelihood):
|
||||||
except:
|
except:
|
||||||
import ipdb; ipdb.set_trace()
|
import ipdb; ipdb.set_trace()
|
||||||
|
|
||||||
W12BiW12 = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
|
W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
|
||||||
ln_B_det = 2*np.sum(np.log(np.diag(L)))
|
ln_B_det = 2*np.sum(np.log(np.diag(L)))
|
||||||
return W12BiW12, ln_B_det
|
return W12BiW12a, ln_B_det
|
||||||
|
|
||||||
def rasm_mode(self, K, MAX_ITER=30):
|
def rasm_mode(self, K, MAX_ITER=40):
|
||||||
"""
|
"""
|
||||||
Rasmussen's numerically stable mode finding
|
Rasmussen's numerically stable mode finding
|
||||||
For nomenclature see Rasmussen & Williams 2006
|
For nomenclature see Rasmussen & Williams 2006
|
||||||
|
|
@ -302,9 +305,10 @@ class Laplace(likelihood):
|
||||||
"""
|
"""
|
||||||
#old_Ki_f = np.zeros((self.N, 1))
|
#old_Ki_f = np.zeros((self.N, 1))
|
||||||
|
|
||||||
#Start f's at zero originally
|
#Start f's at zero originally of if we have gone off track, try restarting
|
||||||
if self.old_Ki_f is None:
|
if self.old_Ki_f is None or self.bad_fhat:
|
||||||
old_Ki_f = np.zeros((self.N, 1))
|
old_Ki_f = np.random.rand(self.N, 1)/50.0
|
||||||
|
#old_Ki_f = self.Y
|
||||||
f = np.dot(K, old_Ki_f)
|
f = np.dot(K, old_Ki_f)
|
||||||
else:
|
else:
|
||||||
#Start at the old best point
|
#Start at the old best point
|
||||||
|
|
@ -318,7 +322,7 @@ class Laplace(likelihood):
|
||||||
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
|
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
difference = np.inf
|
difference = np.inf
|
||||||
epsilon = 1e-5
|
epsilon = 1e-7
|
||||||
#step_size = 1
|
#step_size = 1
|
||||||
#rs = 0
|
#rs = 0
|
||||||
i = 0
|
i = 0
|
||||||
|
|
@ -381,14 +385,20 @@ class Laplace(likelihood):
|
||||||
|
|
||||||
#difference = abs(new_obj - old_obj)
|
#difference = abs(new_obj - old_obj)
|
||||||
#old_obj = new_obj.copy()
|
#old_obj = new_obj.copy()
|
||||||
difference = np.abs(np.sum(f - f_old))
|
difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f))
|
||||||
#difference = np.abs(np.sum(Ki_f - old_Ki_f))
|
#difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N)
|
||||||
old_Ki_f = Ki_f.copy()
|
old_Ki_f = Ki_f.copy()
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
self.old_Ki_f = old_Ki_f.copy()
|
self.old_Ki_f = old_Ki_f.copy()
|
||||||
|
|
||||||
|
#Warn of bad fits
|
||||||
if difference > epsilon:
|
if difference > epsilon:
|
||||||
print "Not perfect f_hat fit difference: {}".format(difference)
|
self.bad_fhat = True
|
||||||
|
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
|
||||||
|
elif self.bad_fhat:
|
||||||
|
self.bad_fhat = False
|
||||||
|
warnings.warn("f_hat now perfect again")
|
||||||
|
|
||||||
self.Ki_f = Ki_f
|
self.Ki_f = Ki_f
|
||||||
return f
|
return f
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue