mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
Trying to debug kernel parameters learning (fails even when noise fixed)
may be some instablility, seems like it can get it if it starts close
This commit is contained in:
parent
ee980227ac
commit
57001851c4
3 changed files with 110 additions and 23 deletions
|
|
@ -1,6 +1,7 @@
|
|||
import GPy
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
np.random.seed(1)
|
||||
|
||||
def timing():
|
||||
real_var = 0.1
|
||||
|
|
@ -86,17 +87,67 @@ def v_fail_test():
|
|||
|
||||
def student_t_f_check():
|
||||
plt.close('all')
|
||||
real_std = 0.1
|
||||
X = np.random.rand(100)[:, None]
|
||||
X = np.linspace(0, 1, 50)[:, None]
|
||||
real_std = 0.001
|
||||
noise = np.random.randn(*X.shape)*real_std
|
||||
Y = np.sin(X*2*np.pi) + noise
|
||||
deg_free = 1000
|
||||
|
||||
kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
|
||||
mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
|
||||
mgp.ensure_default_constraints()
|
||||
mgp.randomize()
|
||||
mgp.optimize()
|
||||
print mgp
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
kernelst = kernelgp.copy()
|
||||
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=1e-5)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
|
||||
m = GPy.models.GP(X, stu_t_likelihood, kernelst)
|
||||
m['rbf_v'] = mgp._get_params()[0]
|
||||
m['rbf_l'] = mgp._get_params()[1] + 1
|
||||
m.ensure_default_constraints()
|
||||
m.constrain_positive('t_no')
|
||||
print m
|
||||
plt.figure()
|
||||
plt.subplot(511)
|
||||
m.plot()
|
||||
print m
|
||||
plt.subplot(512)
|
||||
m.optimize(max_f_eval=15)
|
||||
m.plot()
|
||||
print m
|
||||
plt.subplot(513)
|
||||
m.optimize(max_f_eval=15)
|
||||
m.plot()
|
||||
print m
|
||||
plt.subplot(514)
|
||||
m.optimize(max_f_eval=15)
|
||||
m.plot()
|
||||
print m
|
||||
plt.subplot(515)
|
||||
m.optimize()
|
||||
m.plot()
|
||||
print "final optimised student t"
|
||||
print m
|
||||
print "real GP"
|
||||
print mgp
|
||||
|
||||
def student_t_fix_optimise_check():
|
||||
plt.close('all')
|
||||
real_var = 0.1
|
||||
real_std = np.sqrt(real_var)
|
||||
X = np.random.rand(200)[:, None]
|
||||
noise = np.random.randn(*X.shape)*real_std
|
||||
Y = np.sin(X*2*np.pi) + noise
|
||||
X_full = X
|
||||
Y_full = np.sin(X_full)
|
||||
#Y = Y/Y.max()
|
||||
deg_free = 10000
|
||||
deg_free = 1000
|
||||
|
||||
#GP
|
||||
kernelgp = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
|
||||
kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
|
||||
mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
|
||||
mgp.ensure_default_constraints()
|
||||
mgp.randomize()
|
||||
|
|
@ -113,10 +164,12 @@ def student_t_f_check():
|
|||
m = GPy.models.GP(X, stu_t_likelihood, kernelst)
|
||||
m.constrain_fixed('rbf_var', mgp._get_params()[0])
|
||||
m.constrain_fixed('rbf_len', mgp._get_params()[1])
|
||||
m.constrain_positive('t_noise')
|
||||
#m.ensure_default_constraints()
|
||||
|
||||
m.update_likelihood_approximation()
|
||||
print "T std2 {} converted from original data, LL: {}".format(real_stu_t_std2, m.log_likelihood())
|
||||
plt.subplot(221)
|
||||
plt.subplot(231)
|
||||
m.plot()
|
||||
plt.title('Student t original data noise')
|
||||
|
||||
|
|
@ -125,7 +178,7 @@ def student_t_f_check():
|
|||
m['t_noise_std2'] = gp_noise
|
||||
m.update_likelihood_approximation()
|
||||
print "T std2 {} same as GP noise, LL: {}".format(gp_noise, m.log_likelihood())
|
||||
plt.subplot(222)
|
||||
plt.subplot(232)
|
||||
m.plot()
|
||||
plt.title('Student t GP noise')
|
||||
|
||||
|
|
@ -134,29 +187,57 @@ def student_t_f_check():
|
|||
m['t_noise_std2'] = real_stu_t_std2gp
|
||||
m.update_likelihood_approximation()
|
||||
print "T std2 {} converted to student t noise from GP noise, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.log_likelihood())
|
||||
plt.subplot(223)
|
||||
plt.subplot(233)
|
||||
m.plot()
|
||||
plt.title('Student t GP noise converted')
|
||||
|
||||
m.constrain_positive('t_noise_std2')
|
||||
m.randomize()
|
||||
m.update_likelihood_approximation()
|
||||
plt.subplot(234)
|
||||
m.plot()
|
||||
plt.title('Student t fixed rbf')
|
||||
m.optimize()
|
||||
print "T std2 {} var {} after optimising, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.likelihood.likelihood_function.variance, m.log_likelihood())
|
||||
plt.subplot(224)
|
||||
plt.subplot(235)
|
||||
m.plot()
|
||||
plt.title('Student t optimised')
|
||||
plt.title('Student t fixed rbf optimised')
|
||||
|
||||
plt.figure(2)
|
||||
mrbf = m.copy()
|
||||
mrbf.unconstrain('')
|
||||
mrbf.constrain_fixed('t_noise', m.likelihood.likelihood_function.sigma2)
|
||||
gp_var = mgp._get_params()[0]
|
||||
gp_len = mgp._get_params()[1]
|
||||
mrbf.constrain_fixed('rbf_var', gp_var)
|
||||
mrbf.constrain_positive('rbf_len')
|
||||
mrbf.randomize()
|
||||
print "Before optimize"
|
||||
print mrbf
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
mrbf.checkgrad(verbose=1)
|
||||
plt.subplot(121)
|
||||
mrbf.plot()
|
||||
plt.title('Student t fixed noise')
|
||||
#mrbf.optimize()
|
||||
print "After optimize"
|
||||
print mrbf
|
||||
plt.subplot(122)
|
||||
mrbf.plot()
|
||||
plt.title('Student t fixed noise optimized')
|
||||
print mrbf
|
||||
|
||||
plt.figure(3)
|
||||
print "GP noise {} after optimising, LL: {}".format(gp_noise, mgp.log_likelihood())
|
||||
plt.suptitle('Gaussian likelihood optimised')
|
||||
mgp.plot()
|
||||
print "Real std: {}".format(real_std)
|
||||
print "Real variance {}".format(real_std**2)
|
||||
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
#import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
|
||||
return m
|
||||
print "Len should be: {}".format(gp_len)
|
||||
return mrbf
|
||||
|
||||
def debug_student_t_noise_approx():
|
||||
plot = False
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ class Laplace(likelihood):
|
|||
expl = 0.5*expl_a + 0.5*expl_b # Might need to be -?
|
||||
dL_dthetaK_exp = dK_dthetaK(expl, X)
|
||||
dL_dthetaK_imp = dK_dthetaK(impl, X)
|
||||
#print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
|
||||
print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
|
||||
dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
|
||||
return dL_dthetaK
|
||||
|
||||
|
|
@ -290,10 +290,12 @@ class Laplace(likelihood):
|
|||
:MAX_RESTART: Maximum number of restarts (reducing step_size) before forcing finish of optimisation
|
||||
:returns: f_mode
|
||||
"""
|
||||
if self.old_a is None:
|
||||
old_a = np.zeros((self.N, 1))
|
||||
else:
|
||||
old_a = self.old_a
|
||||
old_a = np.zeros((self.N, 1))
|
||||
#old_a = None
|
||||
#if self.old_a is None:
|
||||
#old_a = np.zeros((self.N, 1))
|
||||
#else:
|
||||
#old_a = self.old_a
|
||||
|
||||
f = np.dot(self.K, old_a)
|
||||
self.f = f
|
||||
|
|
@ -308,8 +310,6 @@ class Laplace(likelihood):
|
|||
step_size = 1
|
||||
rs = 0
|
||||
i = 0
|
||||
#if self.likelihood_function.sigma < 0.001:
|
||||
#import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
|
||||
W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
|
||||
if not self.likelihood_function.log_concave:
|
||||
|
|
@ -371,8 +371,10 @@ class Laplace(likelihood):
|
|||
old_a = self.a #a
|
||||
i += 1
|
||||
|
||||
self.old_a = old_a
|
||||
#print "Positive difference obj: ", np.float(difference)
|
||||
print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size)
|
||||
#print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size)
|
||||
print "Iterations: {}, Final_difference: {}".format(i, difference)
|
||||
#self.a = a
|
||||
#self.B, self.B_chol, self.W_12 = B, L, W_12
|
||||
#self.Bi, _, _, B_det = pdinv(self.B)
|
||||
|
|
|
|||
|
|
@ -132,7 +132,11 @@ class GP(model):
|
|||
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
||||
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
||||
"""
|
||||
if isinstance(self.likelihood, Laplace):
|
||||
self.likelihood.fit_full(self.kern.K(self.X))
|
||||
self.likelihood._set_params(self.likelihood._get_params())
|
||||
l = -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||
print "K_ldet: {} mft: {} Z: {}".format(self.K_logdet, self._model_fit_term(), self.likelihood.Z)
|
||||
return l
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
|
|
@ -142,12 +146,12 @@ class GP(model):
|
|||
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
||||
"""
|
||||
dL_dthetaK = self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X)
|
||||
#print "dL_dthetaK should be: ", dL_dthetaK
|
||||
print "dL_dthetaK should be: ", dL_dthetaK
|
||||
if isinstance(self.likelihood, Laplace):
|
||||
#self.likelihood.fit_full(self.kern.K(self.X))
|
||||
#self.likelihood._set_params(self.likelihood._get_params())
|
||||
self.likelihood.fit_full(self.kern.K(self.X))
|
||||
self.likelihood._set_params(self.likelihood._get_params())
|
||||
dK_dthetaK = self.kern.dK_dtheta
|
||||
dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK, self.X)
|
||||
dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK, self.X.copy())
|
||||
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
|
||||
else:
|
||||
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue