Fixed 2*variance plotting instead of 2*std plotting, tidied up

This commit is contained in:
Alan Saul 2013-07-09 11:35:42 +01:00
parent 2a366619b3
commit ee980227ac
4 changed files with 78 additions and 47 deletions

View file

@ -85,24 +85,78 @@ def v_fail_test():
print(m)
def student_t_f_check():
real_var = 0.1
plt.close('all')
real_std = 0.1
X = np.random.rand(100)[:, None]
Y = np.sin(X*2*np.pi) + np.random.randn(*X.shape)*real_var
noise = np.random.randn(*X.shape)*real_std
Y = np.sin(X*2*np.pi) + noise
X_full = X
Y_full = np.sin(X_full)
Y = Y/Y.max()
deg_free = 1000
real_sd = np.sqrt(real_var)
#Y = Y/Y.max()
deg_free = 10000
kernel = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
real_stu_t_std = np.sqrt(real_var*((deg_free - 2)/float(deg_free)))
#GP
kernelgp = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
mgp.ensure_default_constraints()
mgp.randomize()
mgp.optimize()
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=real_stu_t_std**2)
kernelst = kernelgp.copy()
real_stu_t_std2 = (real_std**2)*((deg_free - 2)/float(deg_free))
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=real_stu_t_std2)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
m = GPy.models.GP(X, stu_t_likelihood, kernel)
m.constrain_positive('t_noise_std2')
m.ensure_default_constraints()
plt.figure(1)
plt.suptitle('Student likelihood')
m = GPy.models.GP(X, stu_t_likelihood, kernelst)
m.constrain_fixed('rbf_var', mgp._get_params()[0])
m.constrain_fixed('rbf_len', mgp._get_params()[1])
m.update_likelihood_approximation()
print "T std2 {} converted from original data, LL: {}".format(real_stu_t_std2, m.log_likelihood())
plt.subplot(221)
m.plot()
plt.title('Student t original data noise')
#Fix student t noise variance to same a GP
gp_noise = mgp._get_params()[2]
m['t_noise_std2'] = gp_noise
m.update_likelihood_approximation()
print "T std2 {} same as GP noise, LL: {}".format(gp_noise, m.log_likelihood())
plt.subplot(222)
m.plot()
plt.title('Student t GP noise')
#Fix student t noise to variance converted from the GP
real_stu_t_std2gp = (gp_noise)*((deg_free - 2)/float(deg_free))
m['t_noise_std2'] = real_stu_t_std2gp
m.update_likelihood_approximation()
print "T std2 {} converted to student t noise from GP noise, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.log_likelihood())
plt.subplot(223)
m.plot()
plt.title('Student t GP noise converted')
m.constrain_positive('t_noise_std2')
m.randomize()
m.update_likelihood_approximation()
m.optimize()
print "T std2 {} var {} after optimising, LL: {}".format(m.likelihood.likelihood_function.sigma2, m.likelihood.likelihood_function.variance, m.log_likelihood())
plt.subplot(224)
m.plot()
plt.title('Student t optimised')
plt.figure(2)
print "GP noise {} after optimising, LL: {}".format(gp_noise, mgp.log_likelihood())
plt.suptitle('Gaussian likelihood optimised')
mgp.plot()
print "Real std: {}".format(real_std)
print "Real variance {}".format(real_std**2)
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
return m
def debug_student_t_noise_approx():
plot = False
@ -218,16 +272,16 @@ def student_t_approx():
"""
Example of regressing with a student t likelihood
"""
real_var = 0.2
real_std = 0.1
#Start a function, any function
X = np.linspace(0.0, 10.0, 30)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*real_var
X = np.linspace(0.0, 10.0, 50)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*real_std
Yc = Y.copy()
X_full = np.linspace(0.0, 10.0, 500)[:, None]
Y_full = np.sin(X_full)
#Y = Y/Y.max()
Y = Y/Y.max()
Yc[10] += 100
Yc[25] += 10
@ -238,10 +292,9 @@ def student_t_approx():
#Add student t random noise to datapoints
deg_free = 8
real_sd = np.sqrt(real_var)
print "Real noise: ", real_sd
print "Real noise: ", real_std
initial_var_guess = 0.01
initial_var_guess = 0.1
#t_rv = t(deg_free, loc=0, scale=real_var)
#noise = t_rvrvs(size=Y.shape)
#Y += noise
@ -293,7 +346,7 @@ def student_t_approx():
plt.figure(2)
plt.suptitle('Student-t likelihood')
edited_real_sd = real_sd #initial_var_guess
edited_real_sd = real_std #initial_var_guess
print "Clean student t, rasm"
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
@ -301,6 +354,7 @@ def student_t_approx():
m = GPy.models.GP(X, stu_t_likelihood, kernel6)
m.ensure_default_constraints()
m.constrain_positive('t_noise')
m.randomize()
m.update_likelihood_approximation()
m.optimize()
print(m)
@ -316,6 +370,7 @@ def student_t_approx():
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
m.ensure_default_constraints()
m.constrain_positive('t_noise')
m.randomize()
m.update_likelihood_approximation()
m.optimize()
print(m)

View file

@ -89,7 +89,7 @@ class Laplace(likelihood):
expl = 0.5*expl_a + 0.5*expl_b # Might need to be -?
dL_dthetaK_exp = dK_dthetaK(expl, X)
dL_dthetaK_imp = dK_dthetaK(impl, X)
print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
#print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
return dL_dthetaK

View file

@ -193,16 +193,11 @@ class student_t(likelihood_function):
"""
assert y.shape == f.shape
e = y - f
A = gammaln((self.v + 1) * 0.5)
B = -gammaln(self.v * 0.5)
C = - 0.5*np.log(self.sigma2 * self.v * np.pi)
D = (-(self.v + 1)*0.5)*np.log(1 + (e**2)/(self.v*self.sigma2))
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi)
+ (-(self.v + 1)*0.5)*np.log(1 + ((e**2)/self.sigma2)/np.float(self.v))
)
#print "A: {} B: {} C: {} D: {} obj: {}".format(A,B,C,D.sum(),objective.sum())
return np.sum(objective)
def dlik_df(self, y, f, extra_data=None):
@ -266,15 +261,6 @@ class student_t(likelihood_function):
"""
assert y.shape == f.shape
e = y - f
#sigma = np.sqrt(self.sigma2)
#dlik_dsigma = ( - (1/sigma) +
#((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
#)
#dlik_dsigma = ( - 1 +
#((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
#)
#dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
#dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v))
dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return dlik_dsigma
@ -286,10 +272,6 @@ class student_t(likelihood_function):
"""
assert y.shape == f.shape
e = y - f
#sigma = np.sqrt(self.sigma2)
#dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
#/ ((self.v*self.sigma2 + e**2)**2)
#)
dlik_grad_dsigma = (-self.v*(self.v+1)*e)/((self.sigma2*self.v + e**2)**2)
return dlik_grad_dsigma
@ -301,12 +283,6 @@ class student_t(likelihood_function):
"""
assert y.shape == f.shape
e = y - f
#sigma = np.sqrt(self.sigma2)
#dlik_hess_dsigma = ( (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) /
#((e**2 + self.sigma2*self.v)**3)
#)
#dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
#/ ((e**2 + (self.sigma**2)*self.v)**3) )
dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ (self.sigma2*self.v + (e**2))**3
)
@ -344,8 +320,8 @@ class student_t(likelihood_function):
#Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now
#need the 95 and 5 percentiles.
#FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles
p_025 = mu - 2.*true_var
p_975 = mu + 2.*true_var
p_025 = mu - 2.*np.sqrt(true_var)
p_975 = mu + 2.*np.sqrt(true_var)
return mu, np.nan*mu, p_025, p_975

View file

@ -152,7 +152,7 @@ class GP(model):
else:
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
#print "Stacked dL_dthetaK, dL_dthetaL: ", np.hstack((dL_dthetaK, dL_dthetaL))
print "dL_dthetaK: {} dL_dthetaL: {}".format(dL_dthetaK, dL_dthetaL)
#print "dL_dthetaK: {} dL_dthetaL: {}".format(dL_dthetaK, dL_dthetaL)
return np.hstack((dL_dthetaK, dL_dthetaL))
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))