Updated boston tests (more folds, allow a bias as the datasets

are not normalized once split) and more folds. Tweaked some
laplace line search parameters, added basis tests for ep
This commit is contained in:
Alan Saul 2013-10-23 14:39:33 +01:00
parent 6678bca011
commit 3e0b597486
3 changed files with 75 additions and 36 deletions

View file

@ -193,6 +193,8 @@ def gaussian_f_check():
def boston_example():
import sklearn
from sklearn.cross_validation import KFold
optimizer='bfgs'
messages=0
data = datasets.boston_housing()
X = data['X'].copy()
Y = data['Y'].copy()
@ -200,9 +202,9 @@ def boston_example():
X = X/X.std(axis=0)
Y = Y-Y.mean()
Y = Y/Y.std()
num_folds = 10
num_folds = 30
kf = KFold(len(Y), n_folds=num_folds, indices=True)
score_folds = np.zeros((6, num_folds))
score_folds = np.zeros((7, num_folds))
def rmse(Y, Ystar):
return np.sqrt(np.mean((Y-Ystar)**2))
for n, (train, test) in enumerate(kf):
@ -212,18 +214,19 @@ def boston_example():
noise = 1e-1 #np.exp(-2)
rbf_len = 0.5
data_axis_plot = 4
plot = True
plot = False
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
#Gaussian GP
print "Gauss GP"
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp)
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
mgp.ensure_default_constraints()
mgp.constrain_fixed('white', 1e-5)
mgp['rbf_len'] = rbf_len
mgp['noise'] = noise
print mgp
mgp.optimize(messages=1)
mgp.optimize(optimizer=optimizer,messages=messages)
Y_test_pred = mgp.predict(X_test)
score_folds[0, n] = rmse(Y_test, Y_test_pred[0])
print mgp
@ -235,11 +238,10 @@ def boston_example():
plt.title('GP gauss')
print "Gaussian Laplace GP"
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
N, D = Y_train.shape
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=g_likelihood)
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
mg.ensure_default_constraints()
mg.constrain_positive('noise_variance')
mg.constrain_fixed('white', 1e-5)
@ -247,7 +249,7 @@ def boston_example():
mg['noise'] = noise
print mg
try:
mg.optimize(messages=1)
mg.optimize(optimizer=optimizer, messages=messages)
except Exception:
print "Blew up"
Y_test_pred = mg.predict(X_test)
@ -263,10 +265,9 @@ def boston_example():
#Student T
deg_free = 1
print "Student-T GP {}df".format(deg_free)
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints()
mstu_t.constrain_fixed('white', 1e-5)
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@ -274,7 +275,7 @@ def boston_example():
mstu_t['t_noise'] = noise
print mstu_t
try:
mstu_t.optimize(messages=1)
mstu_t.optimize(optimizer=optimizer, messages=messages)
except Exception:
print "Blew up"
Y_test_pred = mstu_t.predict(X_test)
@ -287,12 +288,11 @@ def boston_example():
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
plt.title('Stu t {}df'.format(deg_free))
deg_free = 2
deg_free = 8
print "Student-T GP {}df".format(deg_free)
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints()
mstu_t.constrain_fixed('white', 1e-5)
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@ -300,7 +300,7 @@ def boston_example():
mstu_t['t_noise'] = noise
print mstu_t
try:
mstu_t.optimize(messages=1)
mstu_t.optimize(optimizer=optimizer, messages=messages)
except Exception:
print "Blew up"
Y_test_pred = mstu_t.predict(X_test)
@ -316,10 +316,9 @@ def boston_example():
#Student t likelihood
deg_free = 3
print "Student-T GP {}df".format(deg_free)
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints()
mstu_t.constrain_fixed('white', 1e-5)
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@ -327,7 +326,7 @@ def boston_example():
mstu_t['t_noise'] = noise
print mstu_t
try:
mstu_t.optimize(messages=1)
mstu_t.optimize(optimizer=optimizer, messages=messages)
except Exception:
print "Blew up"
Y_test_pred = mstu_t.predict(X_test)
@ -342,10 +341,9 @@ def boston_example():
deg_free = 5
print "Student-T GP {}df".format(deg_free)
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints()
mstu_t.constrain_fixed('white', 1e-5)
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@ -353,7 +351,7 @@ def boston_example():
mstu_t['t_noise'] = noise
print mstu_t
try:
mstu_t.optimize(messages=1)
mstu_t.optimize(optimizer=optimizer, messages=messages)
except Exception:
print "Blew up"
Y_test_pred = mstu_t.predict(X_test)
@ -366,9 +364,10 @@ def boston_example():
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
plt.title('Stu t {}df'.format(deg_free))
score_folds[6, n] = rmse(Y_test, np.mean(Y_train))
print "Average scores: {}".format(np.mean(score_folds, 1))
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return score_folds

View file

@ -301,9 +301,9 @@ class Laplace(likelihood):
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
difference = np.inf
epsilon = 1e-6
step_size = 1
rs = 0
epsilon = 1e-5
#step_size = 1
#rs = 0
i = 0
while difference > epsilon and i < MAX_ITER:
@ -330,7 +330,9 @@ class Laplace(likelihood):
i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
#Find the stepsize that minimizes the objective function using a brent line search
new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':30}).fun
#The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
#steps than get this exact then make a step, if B was bigger it might be the other way around though
new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
f = self.f.copy()
Ki_f = self.Ki_f.copy()

View file

@ -30,9 +30,9 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
However if we are holding other parameters fixed and moving something else
We need to check the gradient of each of the fixed parameters
(f and y for example) seperately.
Whilst moving another parameter. otherwise f: gives back R^N and
df: gives back R^NxM where M is
(f and y for example) seperately, whilst moving another parameter.
Otherwise f: gives back R^N and
df: gives back R^NxM where M is
The number of parameters and N is the number of data
Need to take a slice out from f and a slice out of df
"""
@ -48,6 +48,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
#dlik and dlik_dvar gives back 1 value for each
f_ind = min(fnum, fixed_val+1) - 1
print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
#Make grad checker with this param moving, note that set_params is NOT being called
#The parameter is being set directly with __setattr__
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
lambda x : np.atleast_1d(partial_df(x))[fixed_val],
param, 'p')
@ -57,8 +59,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
constraint('p', grad)
if randomize:
grad.randomize()
print grad
if verbose:
print grad
grad.checkgrad(verbose=1)
if not grad.checkgrad():
gradchecking = False
@ -122,6 +124,7 @@ class TestNoiseModels(object):
"constrain": [constraint_wrappers, listed_here]
},
"laplace": boolean_of_whether_model_should_work_for_laplace,
"ep": boolean_of_whether_model_should_work_for_laplace,
"link_f_constraints": [constraint_wrappers, listed_here]
}
"""
@ -177,7 +180,8 @@ class TestNoiseModels(object):
"vals": [self.var],
"constraints": [constrain_positive]
},
"laplace": True
"laplace": True,
"ep": True
},
"Gaussian_log": {
"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
@ -211,6 +215,7 @@ class TestNoiseModels(object):
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
"ep": True
}
}
@ -238,7 +243,14 @@ class TestNoiseModels(object):
f = attributes["f"].copy()
else:
f = self.f.copy()
laplace = attributes["laplace"]
if "laplace" in attributes:
laplace = attributes["laplace"]
else:
laplace = False
if "ep" in attributes:
ep = attributes["ep"]
else:
ep = False
if len(param_vals) > 1:
raise NotImplementedError("Cannot support multiple params in likelihood yet!")
@ -266,6 +278,10 @@ class TestNoiseModels(object):
#laplace likelihood gradcheck
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
if ep:
#ep likelihood gradcheck
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
self.tearDown()
@ -321,7 +337,6 @@ class TestNoiseModels(object):
def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
print param_constraints
assert (
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
params, args=(f, Y), constraints=param_constraints,
@ -332,7 +347,6 @@ class TestNoiseModels(object):
def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
print param_constraints
assert (
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
params, args=(f, Y), constraints=param_constraints,
@ -343,7 +357,6 @@ class TestNoiseModels(object):
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
#print param_constraints
assert (
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
params, args=(f, Y), constraints=param_constraints,
@ -459,6 +472,31 @@ class TestNoiseModels(object):
print m
assert m.checkgrad(step=step)
###########
# EP test #
###########
@with_setup(setUp, tearDown)
def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
print "\n{}".format(inspect.stack()[0][3])
#Normalize
Y = Y/Y.max()
white_var = 0.001
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
ep_likelihood = GPy.likelihoods.EP(Y.copy(), model)
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood)
m.ensure_default_constraints()
m.constrain_fixed('white', white_var)
for param_num in range(len(param_names)):
name = param_names[param_num]
m[name] = param_vals[param_num]
constraints[param_num](name, m)
m.randomize()
m.checkgrad(verbose=1, step=step)
print m
assert m.checkgrad(step=step)
class LaplaceTests(unittest.TestCase):
"""