Tidied up grad checking

2026-05-01 07:46:22 +02:00 · 2013-09-11 15:27:14 +01:00 · 2013-09-11 15:27:14 +01:00 · 42f8180c4e
commit 42f8180c4e
parent cf9ea23aef
4 changed files with 69 additions and 44 deletions
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -27,7 +27,7 @@ def timing():

        t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
        corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
-        m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel1)
+        m = GPy.models.GPRegression(X, Yc.copy(), kernel1, likelihood=corrupt_stu_t_likelihood)
        m.ensure_default_constraints()
        m.update_likelihood_approximation()
        m.optimize()
@ -56,7 +56,7 @@ def v_fail_test():
    print "Clean student t, rasm"
    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernel1)
+    m = GPy.models.GPRegression(X, Y.copy(), kernel1, likelihood=stu_t_likelihood)
    m.constrain_positive('')
    vs = 25
    noises = 30
@ -103,7 +103,7 @@ def student_t_obj_plane():
    kernelst = kernelgp.copy()
    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=(real_std**2))
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood)
    m.ensure_default_constraints()
    m.constrain_fixed('t_no', real_std**2)
    vs = 10
@ -156,7 +156,7 @@ def student_t_f_check():
    #kernelst += GPy.kern.bias(X.shape[1])
    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=0.05)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, Y.copy(), kernelst, likelihood=stu_t_likelihood)
    #m['rbf_v'] = mgp._get_params()[0]
    #m['rbf_l'] = mgp._get_params()[1] + 1
    m.ensure_default_constraints()
@ -211,7 +211,7 @@ def student_t_fix_optimise_check():

    plt.figure(1)
    plt.suptitle('Student likelihood')
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood)
    m.constrain_fixed('rbf_var', mgp._get_params()[0])
    m.constrain_fixed('rbf_len', mgp._get_params()[1])
    m.constrain_positive('t_noise')
@ -352,7 +352,7 @@ def debug_student_t_noise_approx():
    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')

-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernel6)
+    m = GPy.models.GPRegression(X, Y, kernel6, likelihood=stu_t_likelihood)
    #m['rbf_len'] = 1.5
    #m.constrain_fixed('rbf_v', 1.0898)
    #m.constrain_fixed('rbf_l', 0.2651)
@ -482,7 +482,7 @@ def student_t_approx():
    print "Clean student t, rasm"
    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, Y.copy(), kernel6, stu_t_likelihood)
+    m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood)
    m.ensure_default_constraints()
    m.constrain_positive('t_noise')
    m.randomize()
@ -498,7 +498,7 @@ def student_t_approx():
    print "Corrupt student t, rasm"
    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, Yc.copy(), kernel4, corrupt_stu_t_likelihood)
+    m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
    m.ensure_default_constraints()
    m.constrain_positive('t_noise')
    m.randomize()
@ -516,7 +516,7 @@ def student_t_approx():
    #print "Clean student t, ncg"
    #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
-    #m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3)
+    #m = GPy.models.GPRegression(X, Y, kernel3, likelihood=stu_t_likelihood)
    #m.ensure_default_constraints()
    #m.update_likelihood_approximation()
    #m.optimize()
@ -530,7 +530,7 @@ def student_t_approx():
    #print "Corrupt student t, ncg"
    #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    #corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg')
-    #m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel5)
+    #m = GPy.models.GPRegression(X, Y, kernel5, likelihood=corrupt_stu_t_likelihood)
    #m.ensure_default_constraints()
    #m.update_likelihood_approximation()
    #m.optimize()
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@ -41,9 +41,12 @@ class Laplace(likelihood):
        self.N, self.D = self.data.shape
        self.is_heteroscedastic = True
        self.Nparams = 0
-
        self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))

+        self.restart()
+
+
+    def restart(self):
        #Initial values for the GP variables
        self.Y = np.zeros((self.N, 1))
        self.covariance_matrix = np.eye(self.N)
@ -53,6 +56,7 @@ class Laplace(likelihood):

        self.old_a = None

+
    def predictive_values(self, mu, var, full_cov):
        if full_cov:
            raise NotImplementedError("Cannot make correlated predictions with an Laplace likelihood")
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -280,7 +280,7 @@ class StudentT(LikelihoodFunction):
                    )
        return d3lik_d3f

-    def lik_dstd(self, y, f, extra_data=None):
+    def dlik_dvar(self, y, f, extra_data=None):
        """
        Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)

@ -291,10 +291,10 @@ class StudentT(LikelihoodFunction):
        """
        assert y.shape == f.shape
        e = y - f
-        dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
-        return dlik_dsigma
+        dlik_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
+        return dlik_dvar

-    def dlik_df_dstd(self, y, f, extra_data=None):
+    def dlik_df_dvar(self, y, f, extra_data=None):
        """
        Gradient of the dlik_df w.r.t sigma parameter (standard deviation)

@ -302,10 +302,10 @@ class StudentT(LikelihoodFunction):
        """
        assert y.shape == f.shape
        e = y - f
-        dlik_grad_dsigma = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
-        return dlik_grad_dsigma
+        dlik_grad_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
+        return dlik_grad_dvar

-    def d2lik_d2f_dstd(self, y, f, extra_data=None):
+    def d2lik_d2f_dvar(self, y, f, extra_data=None):
        """
        Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)

@ -313,16 +313,16 @@ class StudentT(LikelihoodFunction):
        """
        assert y.shape == f.shape
        e = y - f
-        dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
+        dlik_hess_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
                              / ((self.sigma2*self.v + (e**2))**3)
                           )
-        return dlik_hess_dsigma
+        return dlik_hess_dvar

    def _gradients(self, y, f, extra_data=None):
        #must be listed in same order as 'get_param_names'
-        derivs = ([self.lik_dstd(y, f, extra_data=extra_data)],
-                  [self.dlik_df_dstd(y, f, extra_data=extra_data)],
-                  [self.d2lik_d2f_dstd(y, f, extra_data=extra_data)]
+        derivs = ([self.dlik_dvar(y, f, extra_data=extra_data)],
+                  [self.dlik_df_dvar(y, f, extra_data=extra_data)],
+                  [self.d2lik_d2f_dvar(y, f, extra_data=extra_data)]
                 ) # lists as we might learn many parameters
        # ensure we have gradients for every parameter we want to optimize
        assert len(derivs[0]) == len(self._get_param_names())
--- a/GPy/testing/laplace_tests.py
+++ b/GPy/testing/laplace_tests.py
@ -4,6 +4,24 @@ import GPy
 from GPy.models import GradientChecker
 import functools

+def dparam_partial(inst_func, *args):
+    """
+    If we have a instance method that needs to be called but that doesn't
+    take the parameter we wish to change to checkgrad, then this function
+    will change the variable using set params.
+
+    inst_func: should be a instance function of an object that we would like
+                to change
+    param: the param that will be given to set_params
+    args: anything else that needs to be given to the function (for example
+          the f or Y that are being used in the function whilst we tweak the
+          param
+    """
+    def param_func(param, inst_func, args):
+        inst_func.im_self._set_params(param)
+        return inst_func(*args)
+    return functools.partial(param_func, inst_func=inst_func, args=args)
+
 class LaplaceTests(unittest.TestCase):
    def setUp(self):
        self.N = 5
@ -24,6 +42,7 @@ class LaplaceTests(unittest.TestCase):
        grad = GradientChecker(link, dlik_df, self.f.copy(), 'f')
        grad.randomize()
        grad.checkgrad(verbose=1)
+        self.assertTrue(grad.checkgrad())

    def test_gaussian_d2lik_d2f(self):
        var = 0.1
@ -33,6 +52,7 @@ class LaplaceTests(unittest.TestCase):
        grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f')
        grad.randomize()
        grad.checkgrad(verbose=1)
+        self.assertTrue(grad.checkgrad())

    def test_gaussian_d3lik_d3f(self):
        var = 0.1
@ -42,42 +62,43 @@ class LaplaceTests(unittest.TestCase):
        grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f')
        grad.randomize()
        grad.checkgrad(verbose=1)
+        self.assertTrue(grad.checkgrad())

    def test_gaussian_dlik_dvar(self):
        var = 0.1
        gauss = GPy.likelihoods.functions.Gaussian(var, self.D, self.N)
-        #Since the function we are checking does not directly accept the variable we wish to tweak
-        #We make function which makes the change (set params) then calls the function
-        def p_link_var(var, likelihood, f, Y):
-            likelihood._set_params(var)
-            return likelihood.link_function(f, Y)

-        def p_dlik_dvar(var, likelihood, f, Y):
-            likelihood._set_params(var)
-            return likelihood.dlik_dvar(f, Y)
-
-        link = functools.partial(p_link_var, likelihood=gauss, f=self.f, Y=self.Y)
-        dlik_dvar = functools.partial(p_dlik_dvar, likelihood=gauss, f=self.f, Y=self.Y)
+        link = dparam_partial(gauss.link_function, self.Y, self.f)
+        dlik_dvar = dparam_partial(gauss.dlik_dvar, self.Y, self.f)
        grad = GradientChecker(link, dlik_dvar, var, 'v')
+        grad.constrain_positive('v')
        grad.randomize()
        grad.checkgrad(verbose=1)
+        #self.assertTrue(grad.checkgrad())

    def test_gaussian_dlik_df_dvar(self):
        var = 0.1
        gauss = GPy.likelihoods.functions.Gaussian(var, self.D, self.N)
-        def p_dlik_df(var, likelihood, f, Y):
-            likelihood._set_params(var)
-            return likelihood.dlik_df(f, Y)

-        def p_dlik_df_dstd(var, likelihood, f, Y):
-            likelihood._set_params(var)
-            return likelihood.dlik_df_dvar(f, Y)
-
-        dlik_df = functools.partial(p_dlik_df, likelihood=gauss, f=self.f, Y=self.Y)
-        dlik_df_dstd = functools.partial(p_dlik_df_dstd, likelihood=gauss, f=self.f, Y=self.Y)
-        grad = GradientChecker(dlik_df, dlik_df_dstd, var, 'v')
+        dlik_df = dparam_partial(gauss.dlik_df, self.Y, self.f)
+        dlik_df_dvar = dparam_partial(gauss.dlik_df_dvar, self.Y, self.f)
+        grad = GradientChecker(dlik_df, dlik_df_dvar, var, 'v')
+        grad.constrain_positive('v')
        grad.randomize()
        grad.checkgrad(verbose=1)
+        #self.assertTrue(grad.checkgrad())
+
+    def test_studentt_dlik_dvar(self):
+        var = 0.1
+        stu_t = GPy.likelihoods.functions.StudentT(deg_free=5, sigma2=var)
+
+        link = dparam_partial(stu_t.link_function, self.Y, self.f)
+        dlik_dvar = dparam_partial(stu_t.dlik_dvar, self.Y, self.f)
+        grad = GradientChecker(link, dlik_dvar, var, 'v')
+        grad.constrain_positive('v')
+        grad.randomize()
+        grad.checkgrad(verbose=1)
+        #self.assertTrue(grad.checkgrad())

 if __name__ == "__main__":
    print "Running unit tests"