From 42f8180c4e52d62dc1013bfc4834e0c5faf43ee8 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Wed, 11 Sep 2013 15:27:14 +0100
Subject: [PATCH] Tidied up grad checking

---
 GPy/examples/laplace_approximations.py  | 20 ++++----
 GPy/likelihoods/laplace.py              |  6 ++-
 GPy/likelihoods/likelihood_functions.py | 24 +++++-----
 GPy/testing/laplace_tests.py            | 63 ++++++++++++++++---------
 4 files changed, 69 insertions(+), 44 deletions(-)

diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py
index c0bc3aef..50e1858b 100644
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@@ -27,7 +27,7 @@ def timing():
 
         t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
         corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
-        m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel1)
+        m = GPy.models.GPRegression(X, Yc.copy(), kernel1, likelihood=corrupt_stu_t_likelihood)
         m.ensure_default_constraints()
         m.update_likelihood_approximation()
         m.optimize()
@@ -56,7 +56,7 @@ def v_fail_test():
     print "Clean student t, rasm"
     t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
     stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernel1)
+    m = GPy.models.GPRegression(X, Y.copy(), kernel1, likelihood=stu_t_likelihood)
     m.constrain_positive('')
     vs = 25
     noises = 30
@@ -103,7 +103,7 @@ def student_t_obj_plane():
     kernelst = kernelgp.copy()
     t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=(real_std**2))
     stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood)
     m.ensure_default_constraints()
     m.constrain_fixed('t_no', real_std**2)
     vs = 10
@@ -156,7 +156,7 @@ def student_t_f_check():
     #kernelst += GPy.kern.bias(X.shape[1])
     t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=0.05)
     stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, Y.copy(), kernelst, likelihood=stu_t_likelihood)
     #m['rbf_v'] = mgp._get_params()[0]
     #m['rbf_l'] = mgp._get_params()[1] + 1
     m.ensure_default_constraints()
@@ -211,7 +211,7 @@ def student_t_fix_optimise_check():
 
     plt.figure(1)
     plt.suptitle('Student likelihood')
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood)
     m.constrain_fixed('rbf_var', mgp._get_params()[0])
     m.constrain_fixed('rbf_len', mgp._get_params()[1])
     m.constrain_positive('t_noise')
@@ -352,7 +352,7 @@ def debug_student_t_noise_approx():
     t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
     stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
 
-    m = GPy.models.GPRegression(X, stu_t_likelihood, kernel6)
+    m = GPy.models.GPRegression(X, Y, kernel6, likelihood=stu_t_likelihood)
     #m['rbf_len'] = 1.5
     #m.constrain_fixed('rbf_v', 1.0898)
     #m.constrain_fixed('rbf_l', 0.2651)
@@ -482,7 +482,7 @@ def student_t_approx():
     print "Clean student t, rasm"
     t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
     stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, Y.copy(), kernel6, stu_t_likelihood)
+    m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood)
     m.ensure_default_constraints()
     m.constrain_positive('t_noise')
     m.randomize()
@@ -498,7 +498,7 @@ def student_t_approx():
     print "Corrupt student t, rasm"
     t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
     corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GPRegression(X, Yc.copy(), kernel4, corrupt_stu_t_likelihood)
+    m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
     m.ensure_default_constraints()
     m.constrain_positive('t_noise')
     m.randomize()
@@ -516,7 +516,7 @@ def student_t_approx():
     #print "Clean student t, ncg"
     #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
     #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
-    #m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3)
+    #m = GPy.models.GPRegression(X, Y, kernel3, likelihood=stu_t_likelihood)
     #m.ensure_default_constraints()
     #m.update_likelihood_approximation()
     #m.optimize()
@@ -530,7 +530,7 @@ def student_t_approx():
     #print "Corrupt student t, ncg"
     #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
     #corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg')
-    #m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel5)
+    #m = GPy.models.GPRegression(X, Y, kernel5, likelihood=corrupt_stu_t_likelihood)
     #m.ensure_default_constraints()
     #m.update_likelihood_approximation()
     #m.optimize()
diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py
index b5b16521..2f98b2ff 100644
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@@ -41,9 +41,12 @@ class Laplace(likelihood):
         self.N, self.D = self.data.shape
         self.is_heteroscedastic = True
         self.Nparams = 0
-
         self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))
 
+        self.restart()
+
+
+    def restart(self):
         #Initial values for the GP variables
         self.Y = np.zeros((self.N, 1))
         self.covariance_matrix = np.eye(self.N)
@@ -53,6 +56,7 @@ class Laplace(likelihood):
 
         self.old_a = None
 
+
     def predictive_values(self, mu, var, full_cov):
         if full_cov:
             raise NotImplementedError("Cannot make correlated predictions with an Laplace likelihood")
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index 9d4dc041..330116de 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -280,7 +280,7 @@ class StudentT(LikelihoodFunction):
                     )
         return d3lik_d3f
 
-    def lik_dstd(self, y, f, extra_data=None):
+    def dlik_dvar(self, y, f, extra_data=None):
         """
         Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
 
@@ -291,10 +291,10 @@ class StudentT(LikelihoodFunction):
         """
         assert y.shape == f.shape
         e = y - f
-        dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
-        return dlik_dsigma
+        dlik_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
+        return dlik_dvar
 
-    def dlik_df_dstd(self, y, f, extra_data=None):
+    def dlik_df_dvar(self, y, f, extra_data=None):
         """
         Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
 
@@ -302,10 +302,10 @@ class StudentT(LikelihoodFunction):
         """
         assert y.shape == f.shape
         e = y - f
-        dlik_grad_dsigma = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
-        return dlik_grad_dsigma
+        dlik_grad_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
+        return dlik_grad_dvar
 
-    def d2lik_d2f_dstd(self, y, f, extra_data=None):
+    def d2lik_d2f_dvar(self, y, f, extra_data=None):
         """
         Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)
 
@@ -313,16 +313,16 @@ class StudentT(LikelihoodFunction):
         """
         assert y.shape == f.shape
         e = y - f
-        dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
+        dlik_hess_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
                               / ((self.sigma2*self.v + (e**2))**3)
                            )
-        return dlik_hess_dsigma
+        return dlik_hess_dvar
 
     def _gradients(self, y, f, extra_data=None):
         #must be listed in same order as 'get_param_names'
-        derivs = ([self.lik_dstd(y, f, extra_data=extra_data)],
-                  [self.dlik_df_dstd(y, f, extra_data=extra_data)],
-                  [self.d2lik_d2f_dstd(y, f, extra_data=extra_data)]
+        derivs = ([self.dlik_dvar(y, f, extra_data=extra_data)],
+                  [self.dlik_df_dvar(y, f, extra_data=extra_data)],
+                  [self.d2lik_d2f_dvar(y, f, extra_data=extra_data)]
                  ) # lists as we might learn many parameters
         # ensure we have gradients for every parameter we want to optimize
         assert len(derivs[0]) == len(self._get_param_names())
diff --git a/GPy/testing/laplace_tests.py b/GPy/testing/laplace_tests.py
index 351cfcbb..8aabe50a 100644
--- a/GPy/testing/laplace_tests.py
+++ b/GPy/testing/laplace_tests.py
@@ -4,6 +4,24 @@ import GPy
 from GPy.models import GradientChecker
 import functools
 
+def dparam_partial(inst_func, *args):
+    """
+    If we have a instance method that needs to be called but that doesn't
+    take the parameter we wish to change to checkgrad, then this function
+    will change the variable using set params.
+
+    inst_func: should be a instance function of an object that we would like
+                to change
+    param: the param that will be given to set_params
+    args: anything else that needs to be given to the function (for example
+          the f or Y that are being used in the function whilst we tweak the
+          param
+    """
+    def param_func(param, inst_func, args):
+        inst_func.im_self._set_params(param)
+        return inst_func(*args)
+    return functools.partial(param_func, inst_func=inst_func, args=args)
+
 class LaplaceTests(unittest.TestCase):
     def setUp(self):
         self.N = 5
@@ -24,6 +42,7 @@ class LaplaceTests(unittest.TestCase):
         grad = GradientChecker(link, dlik_df, self.f.copy(), 'f')
         grad.randomize()
         grad.checkgrad(verbose=1)
+        self.assertTrue(grad.checkgrad())
 
     def test_gaussian_d2lik_d2f(self):
         var = 0.1
@@ -33,6 +52,7 @@ class LaplaceTests(unittest.TestCase):
         grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f')
         grad.randomize()
         grad.checkgrad(verbose=1)
+        self.assertTrue(grad.checkgrad())
 
     def test_gaussian_d3lik_d3f(self):
         var = 0.1
@@ -42,42 +62,43 @@ class LaplaceTests(unittest.TestCase):
         grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f')
         grad.randomize()
         grad.checkgrad(verbose=1)
+        self.assertTrue(grad.checkgrad())
 
     def test_gaussian_dlik_dvar(self):
         var = 0.1
         gauss = GPy.likelihoods.functions.Gaussian(var, self.D, self.N)
-        #Since the function we are checking does not directly accept the variable we wish to tweak
-        #We make function which makes the change (set params) then calls the function
-        def p_link_var(var, likelihood, f, Y):
-            likelihood._set_params(var)
-            return likelihood.link_function(f, Y)
 
-        def p_dlik_dvar(var, likelihood, f, Y):
-            likelihood._set_params(var)
-            return likelihood.dlik_dvar(f, Y)
-
-        link = functools.partial(p_link_var, likelihood=gauss, f=self.f, Y=self.Y)
-        dlik_dvar = functools.partial(p_dlik_dvar, likelihood=gauss, f=self.f, Y=self.Y)
+        link = dparam_partial(gauss.link_function, self.Y, self.f)
+        dlik_dvar = dparam_partial(gauss.dlik_dvar, self.Y, self.f)
         grad = GradientChecker(link, dlik_dvar, var, 'v')
+        grad.constrain_positive('v')
         grad.randomize()
         grad.checkgrad(verbose=1)
+        #self.assertTrue(grad.checkgrad())
 
     def test_gaussian_dlik_df_dvar(self):
         var = 0.1
         gauss = GPy.likelihoods.functions.Gaussian(var, self.D, self.N)
-        def p_dlik_df(var, likelihood, f, Y):
-            likelihood._set_params(var)
-            return likelihood.dlik_df(f, Y)
 
-        def p_dlik_df_dstd(var, likelihood, f, Y):
-            likelihood._set_params(var)
-            return likelihood.dlik_df_dvar(f, Y)
-
-        dlik_df = functools.partial(p_dlik_df, likelihood=gauss, f=self.f, Y=self.Y)
-        dlik_df_dstd = functools.partial(p_dlik_df_dstd, likelihood=gauss, f=self.f, Y=self.Y)
-        grad = GradientChecker(dlik_df, dlik_df_dstd, var, 'v')
+        dlik_df = dparam_partial(gauss.dlik_df, self.Y, self.f)
+        dlik_df_dvar = dparam_partial(gauss.dlik_df_dvar, self.Y, self.f)
+        grad = GradientChecker(dlik_df, dlik_df_dvar, var, 'v')
+        grad.constrain_positive('v')
         grad.randomize()
         grad.checkgrad(verbose=1)
+        #self.assertTrue(grad.checkgrad())
+
+    def test_studentt_dlik_dvar(self):
+        var = 0.1
+        stu_t = GPy.likelihoods.functions.StudentT(deg_free=5, sigma2=var)
+
+        link = dparam_partial(stu_t.link_function, self.Y, self.f)
+        dlik_dvar = dparam_partial(stu_t.dlik_dvar, self.Y, self.f)
+        grad = GradientChecker(link, dlik_dvar, var, 'v')
+        grad.constrain_positive('v')
+        grad.randomize()
+        grad.checkgrad(verbose=1)
+        #self.assertTrue(grad.checkgrad())
 
 if __name__ == "__main__":
     print "Running unit tests"