mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-27 05:46:24 +02:00
185 lines
5.5 KiB
Python
185 lines
5.5 KiB
Python
import GPy
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from scipy.stats import t, norm
|
|
from coxGP.python.likelihoods.Laplace import Laplace
|
|
from coxGP.python.likelihoods.likelihood_function import student_t
|
|
|
|
|
|
def student_t_approx():
|
|
"""
|
|
Example of regressing with a student t likelihood
|
|
"""
|
|
real_var = 0.1
|
|
#Start a function, any function
|
|
X = np.linspace(0.0, 10.0, 30)[:, None]
|
|
Y = np.sin(X) + np.random.randn(*X.shape)*real_var
|
|
Yc = Y.copy()
|
|
|
|
X_full = np.linspace(0.0, 10.0, 500)[:, None]
|
|
Y_full = np.sin(X_full)
|
|
|
|
#Y = Y/Y.max()
|
|
|
|
Yc[10] += 100
|
|
Yc[25] += 10
|
|
Yc[23] += 10
|
|
Yc[24] += 10
|
|
#Yc = Yc/Yc.max()
|
|
|
|
#Add student t random noise to datapoints
|
|
deg_free = 10
|
|
real_sd = np.sqrt(real_var)
|
|
#t_rv = t(deg_free, loc=0, scale=real_var)
|
|
#noise = t_rvrvs(size=Y.shape)
|
|
#Y += noise
|
|
|
|
#Add some extreme value noise to some of the datapoints
|
|
#percent_corrupted = 0.15
|
|
#corrupted_datums = int(np.round(Y.shape[0] * percent_corrupted))
|
|
#indices = np.arange(Y.shape[0])
|
|
#np.random.shuffle(indices)
|
|
#corrupted_indices = indices[:corrupted_datums]
|
|
#print corrupted_indices
|
|
#noise = t_rv.rvs(size=(len(corrupted_indices), 1))
|
|
#Y[corrupted_indices] += noise
|
|
plt.figure(1)
|
|
plt.suptitle('Gaussian likelihood')
|
|
# Kernel object
|
|
kernel1 = GPy.kern.rbf(X.shape[1])
|
|
kernel2 = kernel1.copy()
|
|
kernel3 = kernel1.copy()
|
|
kernel4 = kernel1.copy()
|
|
kernel5 = kernel1.copy()
|
|
kernel6 = kernel1.copy()
|
|
|
|
print "Clean Gaussian"
|
|
#A GP should completely break down due to the points as they get a lot of weight
|
|
# create simple GP model
|
|
m = GPy.models.GP_regression(X, Y, kernel=kernel1)
|
|
# optimize
|
|
m.ensure_default_constraints()
|
|
m.optimize()
|
|
# plot
|
|
plt.subplot(211)
|
|
m.plot()
|
|
plt.plot(X_full, Y_full)
|
|
print m
|
|
|
|
#Corrupt
|
|
print "Corrupt Gaussian"
|
|
m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
|
|
m.ensure_default_constraints()
|
|
m.optimize()
|
|
plt.subplot(212)
|
|
m.plot()
|
|
plt.plot(X_full, Y_full)
|
|
print m
|
|
|
|
plt.figure(2)
|
|
plt.suptitle('Student-t likelihood')
|
|
edited_real_sd = real_sd
|
|
|
|
print "Clean student t, ncg"
|
|
t_distribution = student_t(deg_free, sigma=edited_real_sd)
|
|
stu_t_likelihood = Laplace(Y, t_distribution, rasm=False)
|
|
m = GPy.models.GP(X, stu_t_likelihood, kernel3)
|
|
m.ensure_default_constraints()
|
|
m.update_likelihood_approximation()
|
|
m.optimize()
|
|
print(m)
|
|
plt.subplot(221)
|
|
m.plot()
|
|
plt.plot(X_full, Y_full)
|
|
plt.ylim(-2.5, 2.5)
|
|
|
|
print "Corrupt student t, ncg"
|
|
t_distribution = student_t(deg_free, sigma=edited_real_sd)
|
|
corrupt_stu_t_likelihood = Laplace(Yc.copy(), t_distribution, rasm=False)
|
|
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel5)
|
|
m.ensure_default_constraints()
|
|
m.update_likelihood_approximation()
|
|
m.optimize()
|
|
print(m)
|
|
plt.subplot(223)
|
|
m.plot()
|
|
plt.plot(X_full, Y_full)
|
|
plt.ylim(-2.5, 2.5)
|
|
|
|
print "Clean student t, rasm"
|
|
t_distribution = student_t(deg_free, sigma=edited_real_sd)
|
|
stu_t_likelihood = Laplace(Y.copy(), t_distribution, rasm=True)
|
|
m = GPy.models.GP(X, stu_t_likelihood, kernel6)
|
|
m.ensure_default_constraints()
|
|
m.update_likelihood_approximation()
|
|
m.optimize()
|
|
print(m)
|
|
plt.subplot(222)
|
|
m.plot()
|
|
plt.plot(X_full, Y_full)
|
|
plt.ylim(-2.5, 2.5)
|
|
|
|
print "Corrupt student t, rasm"
|
|
t_distribution = student_t(deg_free, sigma=edited_real_sd)
|
|
corrupt_stu_t_likelihood = Laplace(Yc.copy(), t_distribution, rasm=True)
|
|
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
|
|
m.ensure_default_constraints()
|
|
m.update_likelihood_approximation()
|
|
m.optimize()
|
|
print(m)
|
|
plt.subplot(224)
|
|
m.plot()
|
|
plt.plot(X_full, Y_full)
|
|
plt.ylim(-2.5, 2.5)
|
|
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
|
|
|
###with a student t distribution, since it has heavy tails it should work well
|
|
###likelihood_function = student_t(deg_free, sigma=real_var)
|
|
###lap = Laplace(Y, likelihood_function)
|
|
###cov = kernel.K(X)
|
|
###lap.fit_full(cov)
|
|
|
|
###test_range = np.arange(0, 10, 0.1)
|
|
###plt.plot(test_range, t_rv.pdf(test_range))
|
|
###for i in xrange(X.shape[0]):
|
|
###mode = lap.f_hat[i]
|
|
###covariance = lap.hess_hat_i[i,i]
|
|
###scaling = np.exp(lap.ln_z_hat)
|
|
###normalised_approx = norm(loc=mode, scale=covariance)
|
|
###print "Normal with mode %f, and variance %f" % (mode, covariance)
|
|
###plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
|
|
###plt.show()
|
|
|
|
return m
|
|
|
|
|
|
def noisy_laplace_approx():
|
|
"""
|
|
Example of regressing with a student t likelihood
|
|
"""
|
|
#Start a function, any function
|
|
X = np.sort(np.random.uniform(0, 15, 70))[:, None]
|
|
Y = np.sin(X)
|
|
|
|
#Add some extreme value noise to some of the datapoints
|
|
percent_corrupted = 0.05
|
|
corrupted_datums = int(np.round(Y.shape[0] * percent_corrupted))
|
|
indices = np.arange(Y.shape[0])
|
|
np.random.shuffle(indices)
|
|
corrupted_indices = indices[:corrupted_datums]
|
|
print corrupted_indices
|
|
noise = np.random.uniform(-10, 10, (len(corrupted_indices), 1))
|
|
Y[corrupted_indices] += noise
|
|
|
|
#A GP should completely break down due to the points as they get a lot of weight
|
|
# create simple GP model
|
|
m = GPy.models.GP_regression(X, Y)
|
|
|
|
# optimize
|
|
m.ensure_default_constraints()
|
|
m.optimize()
|
|
# plot
|
|
m.plot()
|
|
print m
|
|
|
|
#with a student t distribution, since it has heavy tails it should work well
|