Changed some parameters of the laplace, tidied up examples

This commit is contained in:
Alan Saul 2013-11-28 15:23:39 +00:00
parent 50e9034a6d
commit 0a43329150
2 changed files with 105 additions and 96 deletions

View file

@ -2,22 +2,21 @@ import GPy
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from GPy.util import datasets from GPy.util import datasets
#np.random.seed(1)
def student_t_approx(): def student_t_approx(optimize=True, plot=True):
""" """
Example of regressing with a student t likelihood Example of regressing with a student t likelihood using Laplace
""" """
real_std = 0.1 real_std = 0.1
#Start a function, any function #Start a function, any function
X = np.linspace(0.0, np.pi*2, 100)[:, None] X = np.linspace(0.0, np.pi*2, 100)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*real_std Y = np.sin(X) + np.random.randn(*X.shape)*real_std
Y = Y/Y.max()
Yc = Y.copy() Yc = Y.copy()
X_full = np.linspace(0.0, np.pi*2, 500)[:, None] X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
Y_full = np.sin(X_full) Y_full = np.sin(X_full)
Y_full = Y_full/Y_full.max()
Y = Y/Y.max()
#Slightly noisy data #Slightly noisy data
Yc[75:80] += 1 Yc[75:80] += 1
@ -34,94 +33,93 @@ def student_t_approx():
deg_free = 5 deg_free = 5
print "Real noise: ", real_std print "Real noise: ", real_std
initial_var_guess = 0.5 initial_var_guess = 0.5
edited_real_sd = initial_var_guess
#t_rv = t(deg_free, loc=0, scale=real_var)
#noise = t_rvrvs(size=Y.shape)
#Y += noise
plt.figure(1)
plt.suptitle('Gaussian likelihood')
# Kernel object # Kernel object
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel2 = kernel1.copy() kernel2 = kernel1.copy()
kernel3 = kernel1.copy() kernel3 = kernel1.copy()
kernel4 = kernel1.copy() kernel4 = kernel1.copy()
kernel5 = kernel1.copy()
kernel6 = kernel1.copy()
print "Clean Gaussian" #Gaussian GP model on clean data
#A GP should completely break down due to the points as they get a lot of weight m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
# create simple GP model
m = GPy.models.GPRegression(X, Y, kernel=kernel1)
# optimize # optimize
m.ensure_default_constraints() m1.ensure_default_constraints()
m.constrain_fixed('white', 1e-4) m1.constrain_fixed('white', 1e-5)
m.randomize() m1.randomize()
m.optimize()
# plot
ax = plt.subplot(211)
m.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Gaussian clean')
print m
#Corrupt #Gaussian GP model on corrupt data
print "Corrupt Gaussian" m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
m = GPy.models.GPRegression(X, Yc, kernel=kernel2) m2.ensure_default_constraints()
m.ensure_default_constraints() m2.constrain_fixed('white', 1e-5)
m.constrain_fixed('white', 1e-4) m2.randomize()
m.randomize()
m.optimize()
ax = plt.subplot(212)
m.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Gaussian corrupt')
print m
plt.figure(2) #Student t GP model on clean data
plt.suptitle('Student-t likelihood')
edited_real_sd = initial_var_guess
print "Clean student t, rasm"
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood) m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood)
m.ensure_default_constraints() m3.ensure_default_constraints()
m.constrain_positive('t_noise') m3.constrain_bounded('t_noise', 1e-6, 10.)
m.constrain_fixed('white', 1e-4) m3.constrain_fixed('white', 1e-5)
m.randomize() m3.randomize()
#m.update_likelihood_approximation()
m.optimize()
print(m)
ax = plt.subplot(211)
m.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Student-t rasm clean')
print "Corrupt student t, rasm" #Student t GP model on corrupt data
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution) corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood) m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
m.ensure_default_constraints() m4.ensure_default_constraints()
m.constrain_bounded('t_noise', 1e-6, 10.) m4.constrain_bounded('t_noise', 1e-6, 10.)
m.constrain_fixed('white', 1e-4) m4.constrain_fixed('white', 1e-5)
m.randomize() m4.randomize()
for a in range(1):
m.randomize()
m_start = m.copy()
print m
m.optimize('scg', messages=1)
print(m)
ax = plt.subplot(212)
m.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Student-t rasm corrupt')
return m if optimize:
optimizer='scg'
print "Clean Gaussian"
m1.optimize(optimizer, messages=1)
print "Corrupt Gaussian"
m2.optimize(optimizer, messages=1)
print "Clean student t"
m3.optimize(optimizer, messages=1)
print "Corrupt student t"
m4.optimize(optimizer, messages=1)
if False:
print m1
print m3
plt.figure(3)
plt.scatter(X, m1.likelihood.Y, c='g')
plt.scatter(X, m3.likelihood.Y, c='r')
if plot:
plt.figure(1)
plt.suptitle('Gaussian likelihood')
ax = plt.subplot(211)
m1.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Gaussian clean')
ax = plt.subplot(212)
m2.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Gaussian corrupt')
plt.figure(2)
plt.suptitle('Student-t likelihood')
ax = plt.subplot(211)
m3.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Student-t rasm clean')
ax = plt.subplot(212)
m4.plot(ax=ax)
plt.plot(X_full, Y_full)
plt.ylim(-1.5, 1.5)
plt.title('Student-t rasm corrupt')
return m1, m2, m3, m4
def boston_example(): def boston_example():
import sklearn import sklearn
@ -294,3 +292,4 @@ def precipitation_example():
for n, (train, test) in enumerate(kf): for n, (train, test) in enumerate(kf):
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test] X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
print "Fold {}".format(n) print "Fold {}".format(n)

View file

@ -15,6 +15,7 @@ import scipy as sp
from likelihood import likelihood from likelihood import likelihood
from ..util.linalg import mdot, jitchol, pddet, dpotrs from ..util.linalg import mdot, jitchol, pddet, dpotrs
from functools import partial as partial_func from functools import partial as partial_func
import warnings
class Laplace(likelihood): class Laplace(likelihood):
"""Laplace approximation to a posterior""" """Laplace approximation to a posterior"""
@ -64,6 +65,7 @@ class Laplace(likelihood):
self.YYT = None self.YYT = None
self.old_Ki_f = None self.old_Ki_f = None
self.bad_fhat = False
def predictive_values(self,mu,var,full_cov,**noise_args): def predictive_values(self,mu,var,full_cov,**noise_args):
if full_cov: if full_cov:
@ -198,18 +200,16 @@ class Laplace(likelihood):
Y_tilde = Wi*self.Ki_f + self.f_hat Y_tilde = Wi*self.Ki_f + self.f_hat
self.Wi_K_i = self.W12BiW12 self.Wi_K_i = self.W12BiW12
self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K) ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data) lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde) y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
Z_tilde = (+ self.lik Z_tilde = (+ lik
- 0.5*self.ln_B_det - 0.5*self.ln_B_det
+ 0.5*self.ln_det_Wi_K + 0.5*ln_det_Wi_K
- 0.5*self.f_Ki_f - 0.5*self.f_Ki_f
+ 0.5*self.y_Wi_Ki_i_y + 0.5*y_Wi_K_i_y
) )
#print "Term, {}, {}, {}, {}, {}".format(self.lik, - 0.5*self.ln_B_det, + 0.5*self.ln_det_Wi_K, - 0.5*self.f_Ki_f, + 0.5*self.y_Wi_Ki_i_y)
#Convert to float as its (1, 1) and Z must be a scalar #Convert to float as its (1, 1) and Z must be a scalar
self.Z = np.float64(Z_tilde) self.Z = np.float64(Z_tilde)
self.Y = Y_tilde self.Y = Y_tilde
@ -247,7 +247,10 @@ class Laplace(likelihood):
#At this point get the hessian matrix (or vector as W is diagonal) #At this point get the hessian matrix (or vector as W is diagonal)
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data) self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though if not self.noise_model.log_concave:
#print "Under 1e-10: {}".format(np.sum(self.W < 1e-6))
self.W[self.W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N)) self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
self.Ki_f = self.Ki_f self.Ki_f = self.Ki_f
@ -283,11 +286,11 @@ class Laplace(likelihood):
except: except:
import ipdb; ipdb.set_trace() import ipdb; ipdb.set_trace()
W12BiW12 = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0] W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
ln_B_det = 2*np.sum(np.log(np.diag(L))) ln_B_det = 2*np.sum(np.log(np.diag(L)))
return W12BiW12, ln_B_det return W12BiW12a, ln_B_det
def rasm_mode(self, K, MAX_ITER=30): def rasm_mode(self, K, MAX_ITER=40):
""" """
Rasmussen's numerically stable mode finding Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006 For nomenclature see Rasmussen & Williams 2006
@ -302,9 +305,10 @@ class Laplace(likelihood):
""" """
#old_Ki_f = np.zeros((self.N, 1)) #old_Ki_f = np.zeros((self.N, 1))
#Start f's at zero originally #Start f's at zero originally of if we have gone off track, try restarting
if self.old_Ki_f is None: if self.old_Ki_f is None or self.bad_fhat:
old_Ki_f = np.zeros((self.N, 1)) old_Ki_f = np.random.rand(self.N, 1)/50.0
#old_Ki_f = self.Y
f = np.dot(K, old_Ki_f) f = np.dot(K, old_Ki_f)
else: else:
#Start at the old best point #Start at the old best point
@ -318,7 +322,7 @@ class Laplace(likelihood):
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data) return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
difference = np.inf difference = np.inf
epsilon = 1e-5 epsilon = 1e-7
#step_size = 1 #step_size = 1
#rs = 0 #rs = 0
i = 0 i = 0
@ -381,14 +385,20 @@ class Laplace(likelihood):
#difference = abs(new_obj - old_obj) #difference = abs(new_obj - old_obj)
#old_obj = new_obj.copy() #old_obj = new_obj.copy()
difference = np.abs(np.sum(f - f_old)) difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f))
#difference = np.abs(np.sum(Ki_f - old_Ki_f)) #difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N)
old_Ki_f = Ki_f.copy() old_Ki_f = Ki_f.copy()
i += 1 i += 1
self.old_Ki_f = old_Ki_f.copy() self.old_Ki_f = old_Ki_f.copy()
#Warn of bad fits
if difference > epsilon: if difference > epsilon:
print "Not perfect f_hat fit difference: {}".format(difference) self.bad_fhat = True
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
elif self.bad_fhat:
self.bad_fhat = False
warnings.warn("f_hat now perfect again")
self.Ki_f = Ki_f self.Ki_f = Ki_f
return f return f