mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
Updated boston tests (more folds, allow a bias as the datasets
are not normalized once split) and more folds. Tweaked some laplace line search parameters, added basis tests for ep
This commit is contained in:
parent
6678bca011
commit
3e0b597486
3 changed files with 75 additions and 36 deletions
|
|
@ -193,6 +193,8 @@ def gaussian_f_check():
|
|||
def boston_example():
|
||||
import sklearn
|
||||
from sklearn.cross_validation import KFold
|
||||
optimizer='bfgs'
|
||||
messages=0
|
||||
data = datasets.boston_housing()
|
||||
X = data['X'].copy()
|
||||
Y = data['Y'].copy()
|
||||
|
|
@ -200,9 +202,9 @@ def boston_example():
|
|||
X = X/X.std(axis=0)
|
||||
Y = Y-Y.mean()
|
||||
Y = Y/Y.std()
|
||||
num_folds = 10
|
||||
num_folds = 30
|
||||
kf = KFold(len(Y), n_folds=num_folds, indices=True)
|
||||
score_folds = np.zeros((6, num_folds))
|
||||
score_folds = np.zeros((7, num_folds))
|
||||
def rmse(Y, Ystar):
|
||||
return np.sqrt(np.mean((Y-Ystar)**2))
|
||||
for n, (train, test) in enumerate(kf):
|
||||
|
|
@ -212,18 +214,19 @@ def boston_example():
|
|||
noise = 1e-1 #np.exp(-2)
|
||||
rbf_len = 0.5
|
||||
data_axis_plot = 4
|
||||
plot = True
|
||||
plot = False
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
|
||||
#Gaussian GP
|
||||
print "Gauss GP"
|
||||
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp)
|
||||
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
|
||||
mgp.ensure_default_constraints()
|
||||
mgp.constrain_fixed('white', 1e-5)
|
||||
mgp['rbf_len'] = rbf_len
|
||||
mgp['noise'] = noise
|
||||
print mgp
|
||||
mgp.optimize(messages=1)
|
||||
mgp.optimize(optimizer=optimizer,messages=messages)
|
||||
Y_test_pred = mgp.predict(X_test)
|
||||
score_folds[0, n] = rmse(Y_test, Y_test_pred[0])
|
||||
print mgp
|
||||
|
|
@ -235,11 +238,10 @@ def boston_example():
|
|||
plt.title('GP gauss')
|
||||
|
||||
print "Gaussian Laplace GP"
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
N, D = Y_train.shape
|
||||
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
|
||||
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
|
||||
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=g_likelihood)
|
||||
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
|
||||
mg.ensure_default_constraints()
|
||||
mg.constrain_positive('noise_variance')
|
||||
mg.constrain_fixed('white', 1e-5)
|
||||
|
|
@ -247,7 +249,7 @@ def boston_example():
|
|||
mg['noise'] = noise
|
||||
print mg
|
||||
try:
|
||||
mg.optimize(messages=1)
|
||||
mg.optimize(optimizer=optimizer, messages=messages)
|
||||
except Exception:
|
||||
print "Blew up"
|
||||
Y_test_pred = mg.predict(X_test)
|
||||
|
|
@ -263,10 +265,9 @@ def boston_example():
|
|||
#Student T
|
||||
deg_free = 1
|
||||
print "Student-T GP {}df".format(deg_free)
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||
mstu_t.ensure_default_constraints()
|
||||
mstu_t.constrain_fixed('white', 1e-5)
|
||||
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
|
||||
|
|
@ -274,7 +275,7 @@ def boston_example():
|
|||
mstu_t['t_noise'] = noise
|
||||
print mstu_t
|
||||
try:
|
||||
mstu_t.optimize(messages=1)
|
||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||
except Exception:
|
||||
print "Blew up"
|
||||
Y_test_pred = mstu_t.predict(X_test)
|
||||
|
|
@ -287,12 +288,11 @@ def boston_example():
|
|||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('Stu t {}df'.format(deg_free))
|
||||
|
||||
deg_free = 2
|
||||
deg_free = 8
|
||||
print "Student-T GP {}df".format(deg_free)
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||
mstu_t.ensure_default_constraints()
|
||||
mstu_t.constrain_fixed('white', 1e-5)
|
||||
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
|
||||
|
|
@ -300,7 +300,7 @@ def boston_example():
|
|||
mstu_t['t_noise'] = noise
|
||||
print mstu_t
|
||||
try:
|
||||
mstu_t.optimize(messages=1)
|
||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||
except Exception:
|
||||
print "Blew up"
|
||||
Y_test_pred = mstu_t.predict(X_test)
|
||||
|
|
@ -316,10 +316,9 @@ def boston_example():
|
|||
#Student t likelihood
|
||||
deg_free = 3
|
||||
print "Student-T GP {}df".format(deg_free)
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||
mstu_t.ensure_default_constraints()
|
||||
mstu_t.constrain_fixed('white', 1e-5)
|
||||
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
|
||||
|
|
@ -327,7 +326,7 @@ def boston_example():
|
|||
mstu_t['t_noise'] = noise
|
||||
print mstu_t
|
||||
try:
|
||||
mstu_t.optimize(messages=1)
|
||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||
except Exception:
|
||||
print "Blew up"
|
||||
Y_test_pred = mstu_t.predict(X_test)
|
||||
|
|
@ -342,10 +341,9 @@ def boston_example():
|
|||
|
||||
deg_free = 5
|
||||
print "Student-T GP {}df".format(deg_free)
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||
mstu_t.ensure_default_constraints()
|
||||
mstu_t.constrain_fixed('white', 1e-5)
|
||||
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
|
||||
|
|
@ -353,7 +351,7 @@ def boston_example():
|
|||
mstu_t['t_noise'] = noise
|
||||
print mstu_t
|
||||
try:
|
||||
mstu_t.optimize(messages=1)
|
||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||
except Exception:
|
||||
print "Blew up"
|
||||
Y_test_pred = mstu_t.predict(X_test)
|
||||
|
|
@ -366,9 +364,10 @@ def boston_example():
|
|||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('Stu t {}df'.format(deg_free))
|
||||
|
||||
score_folds[6, n] = rmse(Y_test, np.mean(Y_train))
|
||||
|
||||
|
||||
|
||||
print "Average scores: {}".format(np.mean(score_folds, 1))
|
||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||
return score_folds
|
||||
|
||||
|
|
|
|||
|
|
@ -301,9 +301,9 @@ class Laplace(likelihood):
|
|||
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
|
||||
|
||||
difference = np.inf
|
||||
epsilon = 1e-6
|
||||
step_size = 1
|
||||
rs = 0
|
||||
epsilon = 1e-5
|
||||
#step_size = 1
|
||||
#rs = 0
|
||||
i = 0
|
||||
|
||||
while difference > epsilon and i < MAX_ITER:
|
||||
|
|
@ -330,7 +330,9 @@ class Laplace(likelihood):
|
|||
|
||||
i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
|
||||
#Find the stepsize that minimizes the objective function using a brent line search
|
||||
new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':30}).fun
|
||||
#The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
|
||||
#steps than get this exact then make a step, if B was bigger it might be the other way around though
|
||||
new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
|
||||
f = self.f.copy()
|
||||
Ki_f = self.Ki_f.copy()
|
||||
|
||||
|
|
|
|||
|
|
@ -30,9 +30,9 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
|
|||
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
|
||||
However if we are holding other parameters fixed and moving something else
|
||||
We need to check the gradient of each of the fixed parameters
|
||||
(f and y for example) seperately.
|
||||
Whilst moving another parameter. otherwise f: gives back R^N and
|
||||
df: gives back R^NxM where M is
|
||||
(f and y for example) seperately, whilst moving another parameter.
|
||||
Otherwise f: gives back R^N and
|
||||
df: gives back R^NxM where M is
|
||||
The number of parameters and N is the number of data
|
||||
Need to take a slice out from f and a slice out of df
|
||||
"""
|
||||
|
|
@ -48,6 +48,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
|
|||
#dlik and dlik_dvar gives back 1 value for each
|
||||
f_ind = min(fnum, fixed_val+1) - 1
|
||||
print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
|
||||
#Make grad checker with this param moving, note that set_params is NOT being called
|
||||
#The parameter is being set directly with __setattr__
|
||||
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
|
||||
lambda x : np.atleast_1d(partial_df(x))[fixed_val],
|
||||
param, 'p')
|
||||
|
|
@ -57,8 +59,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
|
|||
constraint('p', grad)
|
||||
if randomize:
|
||||
grad.randomize()
|
||||
print grad
|
||||
if verbose:
|
||||
print grad
|
||||
grad.checkgrad(verbose=1)
|
||||
if not grad.checkgrad():
|
||||
gradchecking = False
|
||||
|
|
@ -122,6 +124,7 @@ class TestNoiseModels(object):
|
|||
"constrain": [constraint_wrappers, listed_here]
|
||||
},
|
||||
"laplace": boolean_of_whether_model_should_work_for_laplace,
|
||||
"ep": boolean_of_whether_model_should_work_for_laplace,
|
||||
"link_f_constraints": [constraint_wrappers, listed_here]
|
||||
}
|
||||
"""
|
||||
|
|
@ -177,7 +180,8 @@ class TestNoiseModels(object):
|
|||
"vals": [self.var],
|
||||
"constraints": [constrain_positive]
|
||||
},
|
||||
"laplace": True
|
||||
"laplace": True,
|
||||
"ep": True
|
||||
},
|
||||
"Gaussian_log": {
|
||||
"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
|
||||
|
|
@ -211,6 +215,7 @@ class TestNoiseModels(object):
|
|||
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
|
||||
"laplace": True,
|
||||
"Y": self.binary_Y,
|
||||
"ep": True
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -238,7 +243,14 @@ class TestNoiseModels(object):
|
|||
f = attributes["f"].copy()
|
||||
else:
|
||||
f = self.f.copy()
|
||||
laplace = attributes["laplace"]
|
||||
if "laplace" in attributes:
|
||||
laplace = attributes["laplace"]
|
||||
else:
|
||||
laplace = False
|
||||
if "ep" in attributes:
|
||||
ep = attributes["ep"]
|
||||
else:
|
||||
ep = False
|
||||
|
||||
if len(param_vals) > 1:
|
||||
raise NotImplementedError("Cannot support multiple params in likelihood yet!")
|
||||
|
|
@ -266,6 +278,10 @@ class TestNoiseModels(object):
|
|||
|
||||
#laplace likelihood gradcheck
|
||||
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
|
||||
if ep:
|
||||
#ep likelihood gradcheck
|
||||
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
|
||||
|
||||
|
||||
self.tearDown()
|
||||
|
||||
|
|
@ -321,7 +337,6 @@ class TestNoiseModels(object):
|
|||
def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
print param_constraints
|
||||
assert (
|
||||
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
|
|
@ -332,7 +347,6 @@ class TestNoiseModels(object):
|
|||
def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
print param_constraints
|
||||
assert (
|
||||
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
|
|
@ -343,7 +357,6 @@ class TestNoiseModels(object):
|
|||
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
#print param_constraints
|
||||
assert (
|
||||
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
|
|
@ -459,6 +472,31 @@ class TestNoiseModels(object):
|
|||
print m
|
||||
assert m.checkgrad(step=step)
|
||||
|
||||
###########
|
||||
# EP test #
|
||||
###########
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
#Normalize
|
||||
Y = Y/Y.max()
|
||||
white_var = 0.001
|
||||
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
ep_likelihood = GPy.likelihoods.EP(Y.copy(), model)
|
||||
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood)
|
||||
m.ensure_default_constraints()
|
||||
m.constrain_fixed('white', white_var)
|
||||
|
||||
for param_num in range(len(param_names)):
|
||||
name = param_names[param_num]
|
||||
m[name] = param_vals[param_num]
|
||||
constraints[param_num](name, m)
|
||||
|
||||
m.randomize()
|
||||
m.checkgrad(verbose=1, step=step)
|
||||
print m
|
||||
assert m.checkgrad(step=step)
|
||||
|
||||
|
||||
class LaplaceTests(unittest.TestCase):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue