Updated boston tests (more folds, allow a bias as the datasets

are not normalized once split) and more folds. Tweaked some laplace line search parameters, added basis tests for ep
2026-06-11 15:15:15 +02:00 · 2013-10-23 14:39:33 +01:00 · 2013-10-23 14:39:33 +01:00 · 3e0b597486
commit 3e0b597486
parent 6678bca011
3 changed files with 75 additions and 36 deletions
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -193,6 +193,8 @@ def gaussian_f_check():
 def boston_example():
    import sklearn
    from sklearn.cross_validation import KFold
+    optimizer='bfgs'
+    messages=0
    data = datasets.boston_housing()
    X = data['X'].copy()
    Y = data['Y'].copy()
@ -200,9 +202,9 @@ def boston_example():
    X = X/X.std(axis=0)
    Y = Y-Y.mean()
    Y = Y/Y.std()
-    num_folds = 10
+    num_folds = 30
    kf = KFold(len(Y), n_folds=num_folds, indices=True)
-    score_folds = np.zeros((6, num_folds))
+    score_folds = np.zeros((7, num_folds))
    def rmse(Y, Ystar):
        return np.sqrt(np.mean((Y-Ystar)**2))
    for n, (train, test) in enumerate(kf):
@ -212,18 +214,19 @@ def boston_example():
        noise = 1e-1 #np.exp(-2)
        rbf_len = 0.5
        data_axis_plot = 4
-        plot = True
+        plot = False
+        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
+        kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])

        #Gaussian GP
        print "Gauss GP"
-        kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
-        mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp)
+        mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
        mgp.ensure_default_constraints()
        mgp.constrain_fixed('white', 1e-5)
        mgp['rbf_len'] = rbf_len
        mgp['noise'] = noise
        print mgp
-        mgp.optimize(messages=1)
+        mgp.optimize(optimizer=optimizer,messages=messages)
        Y_test_pred = mgp.predict(X_test)
        score_folds[0, n] = rmse(Y_test, Y_test_pred[0])
        print mgp
@ -235,11 +238,10 @@ def boston_example():
            plt.title('GP gauss')

        print "Gaussian Laplace GP"
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
        N, D = Y_train.shape
        g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
        g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
-        mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=g_likelihood)
+        mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
        mg.ensure_default_constraints()
        mg.constrain_positive('noise_variance')
        mg.constrain_fixed('white', 1e-5)
@ -247,7 +249,7 @@ def boston_example():
        mg['noise'] = noise
        print mg
        try:
-            mg.optimize(messages=1)
+            mg.optimize(optimizer=optimizer, messages=messages)
        except Exception:
            print "Blew up"
        Y_test_pred = mg.predict(X_test)
@ -263,10 +265,9 @@ def boston_example():
        #Student T
        deg_free = 1
        print "Student-T GP {}df".format(deg_free)
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
        t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
        stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
-        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
        mstu_t.ensure_default_constraints()
        mstu_t.constrain_fixed('white', 1e-5)
        mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@ -274,7 +275,7 @@ def boston_example():
        mstu_t['t_noise'] = noise
        print mstu_t
        try:
-            mstu_t.optimize(messages=1)
+            mstu_t.optimize(optimizer=optimizer, messages=messages)
        except Exception:
            print "Blew up"
        Y_test_pred = mstu_t.predict(X_test)
@ -287,12 +288,11 @@ def boston_example():
            plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
            plt.title('Stu t {}df'.format(deg_free))

-        deg_free = 2
+        deg_free = 8
        print "Student-T GP {}df".format(deg_free)
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
        t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
        stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
-        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
        mstu_t.ensure_default_constraints()
        mstu_t.constrain_fixed('white', 1e-5)
        mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@ -300,7 +300,7 @@ def boston_example():
        mstu_t['t_noise'] = noise
        print mstu_t
        try:
-            mstu_t.optimize(messages=1)
+            mstu_t.optimize(optimizer=optimizer, messages=messages)
        except Exception:
            print "Blew up"
        Y_test_pred = mstu_t.predict(X_test)
@ -316,10 +316,9 @@ def boston_example():
        #Student t likelihood
        deg_free = 3
        print "Student-T GP {}df".format(deg_free)
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
        t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
        stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
-        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
        mstu_t.ensure_default_constraints()
        mstu_t.constrain_fixed('white', 1e-5)
        mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@ -327,7 +326,7 @@ def boston_example():
        mstu_t['t_noise'] = noise
        print mstu_t
        try:
-            mstu_t.optimize(messages=1)
+            mstu_t.optimize(optimizer=optimizer, messages=messages)
        except Exception:
            print "Blew up"
        Y_test_pred = mstu_t.predict(X_test)
@ -342,10 +341,9 @@ def boston_example():

        deg_free = 5
        print "Student-T GP {}df".format(deg_free)
-        kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
        t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
        stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
-        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
+        mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
        mstu_t.ensure_default_constraints()
        mstu_t.constrain_fixed('white', 1e-5)
        mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
@ -353,7 +351,7 @@ def boston_example():
        mstu_t['t_noise'] = noise
        print mstu_t
        try:
-            mstu_t.optimize(messages=1)
+            mstu_t.optimize(optimizer=optimizer, messages=messages)
        except Exception:
            print "Blew up"
        Y_test_pred = mstu_t.predict(X_test)
@ -366,9 +364,10 @@ def boston_example():
            plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
            plt.title('Stu t {}df'.format(deg_free))

+        score_folds[6, n] = rmse(Y_test, np.mean(Y_train))


-
+    print "Average scores: {}".format(np.mean(score_folds, 1))
    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
    return score_folds

--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@ -301,9 +301,9 @@ class Laplace(likelihood):
            return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)

        difference = np.inf
-        epsilon = 1e-6
-        step_size = 1
-        rs = 0
+        epsilon = 1e-5
+        #step_size = 1
+        #rs = 0
        i = 0

        while difference > epsilon and i < MAX_ITER:
@ -330,7 +330,9 @@ class Laplace(likelihood):

            i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
            #Find the stepsize that minimizes the objective function using a brent line search
-            new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':30}).fun
+            #The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
+            #steps than get this exact then make a step, if B was bigger it might be the other way around though
+            new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
            f = self.f.copy()
            Ki_f = self.Ki_f.copy()

--- a/GPy/testing/likelihoods_tests.py
+++ b/GPy/testing/likelihoods_tests.py
@ -30,9 +30,9 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
    checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
    However if we are holding other parameters fixed and moving something else
    We need to check the gradient of each of the fixed parameters
-    (f and y for example) seperately.
-    Whilst moving another parameter. otherwise f: gives back R^N and
-    df: gives back R^NxM where M is
+    (f and y for example) seperately,  whilst moving another parameter.
+    Otherwise f: gives back R^N and
+              df: gives back R^NxM where M is
    The number of parameters and N is the number of data
    Need to take a slice out from f and a slice out of df
    """
@ -48,6 +48,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
            #dlik and dlik_dvar gives back 1 value for each
            f_ind = min(fnum, fixed_val+1) - 1
            print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
+            #Make grad checker with this param moving, note that set_params is NOT being called
+            #The parameter is being set directly with __setattr__
            grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
                                   lambda x : np.atleast_1d(partial_df(x))[fixed_val],
                                   param, 'p')
@ -57,8 +59,8 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
                    constraint('p', grad)
            if randomize:
                grad.randomize()
-            print grad
            if verbose:
+                print grad
                grad.checkgrad(verbose=1)
            if not grad.checkgrad():
                gradchecking = False
@ -122,6 +124,7 @@ class TestNoiseModels(object):
                    "constrain": [constraint_wrappers, listed_here]
                    },
                "laplace": boolean_of_whether_model_should_work_for_laplace,
+                "ep": boolean_of_whether_model_should_work_for_laplace,
                "link_f_constraints": [constraint_wrappers, listed_here]
                }
        """
@ -177,7 +180,8 @@ class TestNoiseModels(object):
                                "vals": [self.var],
                                "constraints": [constrain_positive]
                                },
-                            "laplace": True
+                            "laplace": True,
+                            "ep": True
                            },
                        "Gaussian_log": {
                            "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
@ -211,6 +215,7 @@ class TestNoiseModels(object):
                            "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
                            "laplace": True,
                            "Y": self.binary_Y,
+                            "ep": True
                        }
                    }

@ -238,7 +243,14 @@ class TestNoiseModels(object):
                f = attributes["f"].copy()
            else:
                f = self.f.copy()
-            laplace = attributes["laplace"]
+            if "laplace" in attributes:
+                laplace = attributes["laplace"]
+            else:
+                laplace = False
+            if "ep" in attributes:
+                ep = attributes["ep"]
+            else:
+                ep = False

            if len(param_vals) > 1:
                raise NotImplementedError("Cannot support multiple params in likelihood yet!")
@ -266,6 +278,10 @@ class TestNoiseModels(object):

                #laplace likelihood gradcheck
                yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
+            if ep:
+                #ep likelihood gradcheck
+                yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
+

        self.tearDown()

@ -321,7 +337,6 @@ class TestNoiseModels(object):
    def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
        print "\n{}".format(inspect.stack()[0][3])
        print model
-        print param_constraints
        assert (
                dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
                    params, args=(f, Y), constraints=param_constraints,
@ -332,7 +347,6 @@ class TestNoiseModels(object):
    def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
        print "\n{}".format(inspect.stack()[0][3])
        print model
-        print param_constraints
        assert (
                dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
                    params, args=(f, Y), constraints=param_constraints,
@ -343,7 +357,6 @@ class TestNoiseModels(object):
    def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
        print "\n{}".format(inspect.stack()[0][3])
        print model
-        #print param_constraints
        assert (
                dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
                    params, args=(f, Y), constraints=param_constraints,
@ -459,6 +472,31 @@ class TestNoiseModels(object):
        print m
        assert m.checkgrad(step=step)

+    ###########
+    # EP test #
+    ###########
+    @with_setup(setUp, tearDown)
+    def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
+        print "\n{}".format(inspect.stack()[0][3])
+        #Normalize
+        Y = Y/Y.max()
+        white_var = 0.001
+        kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
+        ep_likelihood = GPy.likelihoods.EP(Y.copy(), model)
+        m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood)
+        m.ensure_default_constraints()
+        m.constrain_fixed('white', white_var)
+
+        for param_num in range(len(param_names)):
+            name = param_names[param_num]
+            m[name] = param_vals[param_num]
+            constraints[param_num](name, m)
+
+        m.randomize()
+        m.checkgrad(verbose=1, step=step)
+        print m
+        assert m.checkgrad(step=step)
+

 class LaplaceTests(unittest.TestCase):
    """