diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index ea3a9f8e..2f163583 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -193,6 +193,8 @@ def gaussian_f_check(): def boston_example(): import sklearn from sklearn.cross_validation import KFold + optimizer='bfgs' + messages=0 data = datasets.boston_housing() X = data['X'].copy() Y = data['Y'].copy() @@ -200,9 +202,9 @@ def boston_example(): X = X/X.std(axis=0) Y = Y-Y.mean() Y = Y/Y.std() - num_folds = 10 + num_folds = 30 kf = KFold(len(Y), n_folds=num_folds, indices=True) - score_folds = np.zeros((6, num_folds)) + score_folds = np.zeros((7, num_folds)) def rmse(Y, Ystar): return np.sqrt(np.mean((Y-Ystar)**2)) for n, (train, test) in enumerate(kf): @@ -212,18 +214,19 @@ def boston_example(): noise = 1e-1 #np.exp(-2) rbf_len = 0.5 data_axis_plot = 4 - plot = True + plot = False + kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) #Gaussian GP print "Gauss GP" - kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) - mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp) + mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy()) mgp.ensure_default_constraints() mgp.constrain_fixed('white', 1e-5) mgp['rbf_len'] = rbf_len mgp['noise'] = noise print mgp - mgp.optimize(messages=1) + mgp.optimize(optimizer=optimizer,messages=messages) Y_test_pred = mgp.predict(X_test) score_folds[0, n] = rmse(Y_test, Y_test_pred[0]) print mgp @@ -235,11 +238,10 @@ def boston_example(): plt.title('GP gauss') print "Gaussian Laplace GP" - kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) N, D = Y_train.shape g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D) g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution) - mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=g_likelihood) + mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood) mg.ensure_default_constraints() mg.constrain_positive('noise_variance') mg.constrain_fixed('white', 1e-5) @@ -247,7 +249,7 @@ def boston_example(): mg['noise'] = noise print mg try: - mg.optimize(messages=1) + mg.optimize(optimizer=optimizer, messages=messages) except Exception: print "Blew up" Y_test_pred = mg.predict(X_test) @@ -263,10 +265,9 @@ def boston_example(): #Student T deg_free = 1 print "Student-T GP {}df".format(deg_free) - kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) - mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood) + mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) mstu_t.ensure_default_constraints() mstu_t.constrain_fixed('white', 1e-5) mstu_t.constrain_bounded('t_noise', 0.0001, 1000) @@ -274,7 +275,7 @@ def boston_example(): mstu_t['t_noise'] = noise print mstu_t try: - mstu_t.optimize(messages=1) + mstu_t.optimize(optimizer=optimizer, messages=messages) except Exception: print "Blew up" Y_test_pred = mstu_t.predict(X_test) @@ -287,12 +288,11 @@ def boston_example(): plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') plt.title('Stu t {}df'.format(deg_free)) - deg_free = 2 + deg_free = 8 print "Student-T GP {}df".format(deg_free) - kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) - mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood) + mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) mstu_t.ensure_default_constraints() mstu_t.constrain_fixed('white', 1e-5) mstu_t.constrain_bounded('t_noise', 0.0001, 1000) @@ -300,7 +300,7 @@ def boston_example(): mstu_t['t_noise'] = noise print mstu_t try: - mstu_t.optimize(messages=1) + mstu_t.optimize(optimizer=optimizer, messages=messages) except Exception: print "Blew up" Y_test_pred = mstu_t.predict(X_test) @@ -316,10 +316,9 @@ def boston_example(): #Student t likelihood deg_free = 3 print "Student-T GP {}df".format(deg_free) - kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) - mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood) + mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) mstu_t.ensure_default_constraints() mstu_t.constrain_fixed('white', 1e-5) mstu_t.constrain_bounded('t_noise', 0.0001, 1000) @@ -327,7 +326,7 @@ def boston_example(): mstu_t['t_noise'] = noise print mstu_t try: - mstu_t.optimize(messages=1) + mstu_t.optimize(optimizer=optimizer, messages=messages) except Exception: print "Blew up" Y_test_pred = mstu_t.predict(X_test) @@ -342,10 +341,9 @@ def boston_example(): deg_free = 5 print "Student-T GP {}df".format(deg_free) - kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) - mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood) + mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) mstu_t.ensure_default_constraints() mstu_t.constrain_fixed('white', 1e-5) mstu_t.constrain_bounded('t_noise', 0.0001, 1000) @@ -353,7 +351,7 @@ def boston_example(): mstu_t['t_noise'] = noise print mstu_t try: - mstu_t.optimize(messages=1) + mstu_t.optimize(optimizer=optimizer, messages=messages) except Exception: print "Blew up" Y_test_pred = mstu_t.predict(X_test) @@ -366,9 +364,10 @@ def boston_example(): plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') plt.title('Stu t {}df'.format(deg_free)) + score_folds[6, n] = rmse(Y_test, np.mean(Y_train)) - + print "Average scores: {}".format(np.mean(score_folds, 1)) import ipdb; ipdb.set_trace() # XXX BREAKPOINT return score_folds diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py index e6ffd78c..05b4ff02 100644 --- a/GPy/likelihoods/laplace.py +++ b/GPy/likelihoods/laplace.py @@ -301,9 +301,9 @@ class Laplace(likelihood): return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data) difference = np.inf - epsilon = 1e-6 - step_size = 1 - rs = 0 + epsilon = 1e-5 + #step_size = 1 + #rs = 0 i = 0 while difference > epsilon and i < MAX_ITER: @@ -330,7 +330,9 @@ class Laplace(likelihood): i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K) #Find the stepsize that minimizes the objective function using a brent line search - new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':30}).fun + #The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full + #steps than get this exact then make a step, if B was bigger it might be the other way around though + new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun f = self.f.copy() Ki_f = self.Ki_f.copy() diff --git a/GPy/testing/likelihoods_tests.py b/GPy/testing/likelihoods_tests.py index 9a3dfd16..fff5dcac 100644 --- a/GPy/testing/likelihoods_tests.py +++ b/GPy/testing/likelihoods_tests.py @@ -30,9 +30,9 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N However if we are holding other parameters fixed and moving something else We need to check the gradient of each of the fixed parameters - (f and y for example) seperately. - Whilst moving another parameter. otherwise f: gives back R^N and - df: gives back R^NxM where M is + (f and y for example) seperately, whilst moving another parameter. + Otherwise f: gives back R^N and + df: gives back R^NxM where M is The number of parameters and N is the number of data Need to take a slice out from f and a slice out of df """ @@ -48,6 +48,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals #dlik and dlik_dvar gives back 1 value for each f_ind = min(fnum, fixed_val+1) - 1 print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val) + #Make grad checker with this param moving, note that set_params is NOT being called + #The parameter is being set directly with __setattr__ grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind], lambda x : np.atleast_1d(partial_df(x))[fixed_val], param, 'p') @@ -57,8 +59,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals constraint('p', grad) if randomize: grad.randomize() - print grad if verbose: + print grad grad.checkgrad(verbose=1) if not grad.checkgrad(): gradchecking = False @@ -122,6 +124,7 @@ class TestNoiseModels(object): "constrain": [constraint_wrappers, listed_here] }, "laplace": boolean_of_whether_model_should_work_for_laplace, + "ep": boolean_of_whether_model_should_work_for_laplace, "link_f_constraints": [constraint_wrappers, listed_here] } """ @@ -177,7 +180,8 @@ class TestNoiseModels(object): "vals": [self.var], "constraints": [constrain_positive] }, - "laplace": True + "laplace": True, + "ep": True }, "Gaussian_log": { "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N), @@ -211,6 +215,7 @@ class TestNoiseModels(object): "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)], "laplace": True, "Y": self.binary_Y, + "ep": True } } @@ -238,7 +243,14 @@ class TestNoiseModels(object): f = attributes["f"].copy() else: f = self.f.copy() - laplace = attributes["laplace"] + if "laplace" in attributes: + laplace = attributes["laplace"] + else: + laplace = False + if "ep" in attributes: + ep = attributes["ep"] + else: + ep = False if len(param_vals) > 1: raise NotImplementedError("Cannot support multiple params in likelihood yet!") @@ -266,6 +278,10 @@ class TestNoiseModels(object): #laplace likelihood gradcheck yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints + if ep: + #ep likelihood gradcheck + yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints + self.tearDown() @@ -321,7 +337,6 @@ class TestNoiseModels(object): def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model - print param_constraints assert ( dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta, params, args=(f, Y), constraints=param_constraints, @@ -332,7 +347,6 @@ class TestNoiseModels(object): def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model - print param_constraints assert ( dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta, params, args=(f, Y), constraints=param_constraints, @@ -343,7 +357,6 @@ class TestNoiseModels(object): def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model - #print param_constraints assert ( dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta, params, args=(f, Y), constraints=param_constraints, @@ -459,6 +472,31 @@ class TestNoiseModels(object): print m assert m.checkgrad(step=step) + ########### + # EP test # + ########### + @with_setup(setUp, tearDown) + def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): + print "\n{}".format(inspect.stack()[0][3]) + #Normalize + Y = Y/Y.max() + white_var = 0.001 + kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + ep_likelihood = GPy.likelihoods.EP(Y.copy(), model) + m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood) + m.ensure_default_constraints() + m.constrain_fixed('white', white_var) + + for param_num in range(len(param_names)): + name = param_names[param_num] + m[name] = param_vals[param_num] + constraints[param_num](name, m) + + m.randomize() + m.checkgrad(verbose=1, step=step) + print m + assert m.checkgrad(step=step) + class LaplaceTests(unittest.TestCase): """