From 03443245713db87edf475aba2718990e8cda373e Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Tue, 15 Oct 2013 18:58:41 +0100
Subject: [PATCH] Still tidying up, laplace now working again, gaussian and
 student_t likelihoods now done

---
 GPy/likelihoods/laplace.py                    | 10 +--
 .../noise_models/gaussian_noise.py            | 30 +++----
 .../noise_models/noise_distributions.py       | 86 +++++++++++++++++++
 .../noise_models/student_t_noise.py           | 47 +++-------
 GPy/testing/laplace_tests.py                  | 48 +++++------
 GPy/util/misc.py                              | 27 ++++++
 6 files changed, 167 insertions(+), 81 deletions(-)

diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py
index f4233554..8019e430 100644
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@@ -89,7 +89,7 @@ class Laplace(likelihood):
         :rtype: Matrix (1 x num_kernel_params)
         """
         dL_dfhat, I_KW_i = self._shared_gradients_components()
-        dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data)
+        dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data)
 
         #Explicit
         #expl_a = np.dot(self.Ki_f, self.Ki_f.T)
@@ -121,20 +121,20 @@ class Laplace(likelihood):
         :rtype: array of derivatives (1 x num_likelihood_params)
         """
         dL_dfhat, I_KW_i = self._shared_gradients_components()
-        dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.data, self.f_hat)
+        dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data)
 
         num_params = len(dlik_dthetaL)
         # make space for one derivative for each likelihood parameter
         dL_dthetaL = np.zeros(num_params)
         for thetaL_i in range(num_params):
             #Explicit
-            dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
+            dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i])
                              #- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i]))))
-                             + np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[thetaL_i])
+                             + np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i])
                              )
 
             #Implicit
-            dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[thetaL_i])
+            dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i])
             dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
             dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
 
diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py
index 7b2e1a85..8bce30b7 100644
--- a/GPy/likelihoods/noise_models/gaussian_noise.py
+++ b/GPy/likelihoods/noise_models/gaussian_noise.py
@@ -36,18 +36,6 @@ class Gaussian(NoiseDistribution):
         #self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
         self.ln_det_K = self.N*np.log(self.variance)
 
-    def _laplace_gradients(self, y, f, extra_data=None):
-        #must be listed in same order as 'get_param_names'
-        derivs = ([-self._dnlog_mass_dvar(f, y, extra_data=extra_data)],
-                  [-self._dnlog_mass_dgp_dvar(f, y, extra_data=extra_data)],
-                  [-self._d2nlog_mass_dgp2_dvar(f, y, extra_data=extra_data)]
-                 ) # lists as we might learn many parameters
-        # ensure we have gradients for every parameter we want to optimize
-        assert len(derivs[0]) == len(self._get_param_names())
-        assert len(derivs[1]) == len(self._get_param_names())
-        assert len(derivs[2]) == len(self._get_param_names())
-        return derivs
-
     def _gradients(self,partial):
         return np.zeros(1)
         #return np.sum(partial)
@@ -106,9 +94,9 @@ class Gaussian(NoiseDistribution):
                             rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
                             its derivatives")
 
-    def logpdf(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, extra_data=None):
         """
-        Log likelihood function
+        Log likelihood function given link(f)
 
         .. math::
             \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
@@ -187,7 +175,7 @@ class Gaussian(NoiseDistribution):
         d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
         return d3logpdf_dlink3
 
-    def dlogpdf_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
         """
         Gradient of the negative log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
 
@@ -248,6 +236,18 @@ class Gaussian(NoiseDistribution):
         d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
         return d2logpdf_dlink2_dvar
 
+    def dlogpdf_link_dtheta(self, f, y, extra_data=None):
+        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
+        return np.asarray([[dlogpdf_dvar]])
+
+    def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
+        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
+        return dlogpdf_dlink_dvar
+
+    def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
+        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
+        return d2logpdf_dlink2_dvar
+
     def _mean(self,gp):
         """
         Expected value of y under the Mass (or density) function p(y|f)
diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py
index 29b71795..6b36f42b 100644
--- a/GPy/likelihoods/noise_models/noise_distributions.py
+++ b/GPy/likelihoods/noise_models/noise_distributions.py
@@ -9,6 +9,7 @@ import pylab as pb
 from GPy.util.plot import gpplot
 from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
 import gp_transformations
+from GPy.util.misc import chain_1, chain_2, chain_3
 
 
 class NoiseDistribution(object):
@@ -398,6 +399,89 @@ class NoiseDistribution(object):
         """
         return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False)
 
+    def logpdf(self, f, y, extra_data=None):
+        """
+        Evaluates the link function link(f) then computes the log likelihood using it
+        """
+        link_f = self.gp_link.transf(f)
+        return self.logpdf_link(f, y, extra_data=extra_data)
+
+    def dlogpdf_df(self, f, y, extra_data=None):
+        """
+        TODO: Doc strings
+        """
+        link_f = self.gp_link.transf(f)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        dlink_df = self.gp_link.dtransf_df(f)
+        return chain_1(dlogpdf_dlink, dlink_df)
+
+    def d2logpdf_df2(self, f, y, extra_data=None):
+        """
+        TODO: Doc strings
+        """
+        link_f = self.gp_link.transf(f)
+        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
+        dlink_df = self.gp_link.dtransf_df(f)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        d2link_df2 = self.gp_link.d2transf_df2(f)
+        return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
+
+    def d3logpdf_df3(self, f, y, extra_data=None):
+        """
+        TODO: Doc strings
+        """
+        link_f = self.gp_link.transf(f)
+        d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
+        dlink_df = self.gp_link.dtransf_df(f)
+        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
+        d2link_df2 = self.gp_link.d2transf_df2(f)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        d3link_df3 = self.gp_link.d3transf_df3(f)
+        return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
+
+    def dlogpdf_dtheta(self, f, y, extra_data=None):
+        link_f = self.gp_link.transf(f)
+        return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
+
+    def dlogpdf_df_dtheta(self, f, y, extra_data=None):
+        link_f = self.gp_link.transf(f)
+        dlink_df = self.gp_link.dtransf_df(f)
+        dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
+        return chain_1(dlogpdf_dlink_dtheta, dlink_df)
+
+    def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
+        link_f = self.gp_link.transf(f)
+        dlink_df = self.gp_link.dtransf_df(f)
+        d2link_df2 = self.gp_link.d2transf_df2(f) #FIXME: I THINK ITS THIS
+        d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
+        dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
+        return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
+        #return chain_1(d2logpdf_dlink2_dtheta, d2link_df2)
+
+    def _laplace_gradients(self, f, y, extra_data=None):
+        #link_f = self.gp_link.transf(f)
+        #dlink_df = self.gp_link.dtransf_df(f)
+        #d2link_df2 = self.gp_link.d2transf_df2(f)
+
+        #dlogpdf_dtheta = self.dlogpdf_dtheta(link_f, y, extra_data=extra_data)
+        #dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
+        #d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
+
+        ##now chain them all with dlink_df etc
+        #dlogpdf_df_dtheta = chain_1(dlogpdf_dlink_dtheta, dlink_df)
+        #d2logpdf_df2_dtheta = chain_1(d2logpdf_dlink2_dtheta, d2link_df2)
+
+        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
+        dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
+        d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
+
+        #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
+        # ensure we have gradients for every parameter we want to optimize
+        assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
+        assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
+        assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
+        return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
+
     def predictive_values(self,mu,var):
         """
         Compute  mean, variance and conficence interval (percentiles 5 and 95) of the  prediction.
@@ -433,3 +517,5 @@ class NoiseDistribution(object):
         """
         pass
 
+
+
diff --git a/GPy/likelihoods/noise_models/student_t_noise.py b/GPy/likelihoods/noise_models/student_t_noise.py
index dcd41fda..0e881a8d 100644
--- a/GPy/likelihoods/noise_models/student_t_noise.py
+++ b/GPy/likelihoods/noise_models/student_t_noise.py
@@ -40,27 +40,9 @@ class StudentT(NoiseDistribution):
     def variance(self, extra_data=None):
         return (self.v / float(self.v - 2)) * self.sigma2
 
-    def _nlog_mass(self, link_f, y, extra_data=None):
-        NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
-                            Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
-                            rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
-                            its derivatives")
-
-    def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
-        NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
-                            Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
-                            rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
-                            its derivatives")
-
-    def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
-        NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
-                            Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
-                            rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
-                            its derivatives")
-
-    def logpdf(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, extra_data=None):
         """
-        Log Likelihood Function
+        Log Likelihood Function given link(f)
 
         .. math::
             \\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2
@@ -151,7 +133,7 @@ class StudentT(NoiseDistribution):
                     )
         return d3lik_dlink3
 
-    def dlogpdf_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
         """
         Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
 
@@ -169,7 +151,6 @@ class StudentT(NoiseDistribution):
         assert y.shape == link_f.shape
         e = y - link_f
         dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
-        #FIXME: Careful as this hasn't been chained with dlink_var, not sure if we want link functions on our parameters?! Shouldn't need them with constraints
         return np.sum(dlogpdf_dvar)
 
     def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
@@ -214,17 +195,17 @@ class StudentT(NoiseDistribution):
                            )
         return d2logpdf_dlink2_dvar
 
-    def _laplace_gradients(self, y, f, extra_data=None):
-        #must be listed in same order as 'get_param_names'
-        derivs = ([self.dlogpdf_dvar(f, y, extra_data=extra_data)],
-                  [self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)],
-                  [self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)]
-                 ) # lists as we might learn many parameters
-        # ensure we have gradients for every parameter we want to optimize
-        assert len(derivs[0]) == len(self._get_param_names())
-        assert len(derivs[1]) == len(self._get_param_names())
-        assert len(derivs[2]) == len(self._get_param_names())
-        return derivs
+    def dlogpdf_link_dtheta(self, f, y, extra_data=None):
+        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
+        return np.asarray([[dlogpdf_dvar]])
+
+    def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
+        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
+        return dlogpdf_dlink_dvar
+
+    def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
+        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
+        return d2logpdf_dlink2_dvar
 
     def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
         """
diff --git a/GPy/testing/laplace_tests.py b/GPy/testing/laplace_tests.py
index 936241b1..dbdd34f3 100644
--- a/GPy/testing/laplace_tests.py
+++ b/GPy/testing/laplace_tests.py
@@ -80,7 +80,7 @@ class LaplaceTests(unittest.TestCase):
         self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N)
 
         #Make a bigger step as lower bound can be quite curved
-        self.step = 1e-4
+        self.step = 1e-3
 
     def tearDown(self):
         self.stu_t = None
@@ -97,7 +97,6 @@ class LaplaceTests(unittest.TestCase):
 
 
     """ dGauss_df's """
-    @unittest.skip("Not Implemented Yet")
     def test_gaussian_dlogpdf_df(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
@@ -108,7 +107,6 @@ class LaplaceTests(unittest.TestCase):
         grad.checkgrad(verbose=1)
         self.assertTrue(grad.checkgrad())
 
-    @unittest.skip("Not Implemented Yet")
     def test_gaussian_d2logpdf_df2(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
@@ -119,7 +117,6 @@ class LaplaceTests(unittest.TestCase):
         grad.checkgrad(verbose=1)
         self.assertTrue(grad.checkgrad())
 
-    @unittest.skip("Not Implemented Yet")
     def test_gaussian_d3logpdf_df3(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
@@ -130,22 +127,20 @@ class LaplaceTests(unittest.TestCase):
         grad.checkgrad(verbose=1)
         self.assertTrue(grad.checkgrad())
 
-    @unittest.skip("Not Implemented Yet")
     def test_gaussian_dlogpdf_df_dvar(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.gauss.dlogpdf_df, self.gauss.dlogpdf_df_dvar,
+                dparam_checkgrad(self.gauss.dlogpdf_df, self.gauss.dlogpdf_df_dtheta,
                     [self.var], args=(self.f, self.Y), constrain_positive=True,
                     randomize=False, verbose=True)
                 )
 
-    @unittest.skip("Not Implemented Yet")
     def test_gaussian_d2logpdf2_df2_dvar(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.gauss.d2logpdf_df2, self.gauss.d2logpdf_df2_dvar,
+                dparam_checkgrad(self.gauss.d2logpdf_df2, self.gauss.d2logpdf_df2_dtheta,
                     [self.var], args=(self.f, self.Y), constrain_positive=True,
                     randomize=False, verbose=True)
                 )
@@ -182,7 +177,7 @@ class LaplaceTests(unittest.TestCase):
     def test_gaussian_dlogpdf_dvar(self):
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.gauss.logpdf, self.gauss.dlogpdf_dvar,
+                dparam_checkgrad(self.gauss.logpdf, self.gauss.dlogpdf_dtheta,
                     [self.var], args=(self.f, self.Y), constrain_positive=True,
                     randomize=False, verbose=True)
                 )
@@ -190,7 +185,7 @@ class LaplaceTests(unittest.TestCase):
     def test_gaussian_dlogpdf_dlink_dvar(self):
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.gauss.dlogpdf_dlink, self.gauss.dlogpdf_dlink_dvar,
+                dparam_checkgrad(self.gauss.dlogpdf_dlink, self.gauss.dlogpdf_dlink_dtheta,
                     [self.var], args=(self.f, self.Y), constrain_positive=True,
                     randomize=False, verbose=True)
                 )
@@ -198,7 +193,7 @@ class LaplaceTests(unittest.TestCase):
     def test_gaussian_d2logpdf2_dlink2_dvar(self):
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.gauss.d2logpdf_dlink2, self.gauss.d2logpdf_dlink2_dvar,
+                dparam_checkgrad(self.gauss.d2logpdf_dlink2, self.gauss.d2logpdf_dlink2_dtheta,
                     [self.var], args=(self.f, self.Y), constrain_positive=True,
                     randomize=False, verbose=True)
                 )
@@ -228,7 +223,6 @@ class LaplaceTests(unittest.TestCase):
         self.assertTrue(grad.checkgrad())
 
     """ dStudentT_df's """
-    @unittest.skip("Not Implemented Yet")
     def test_studentt_dlogpdf_df(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
@@ -239,7 +233,6 @@ class LaplaceTests(unittest.TestCase):
         grad.checkgrad(verbose=1)
         self.assertTrue(grad.checkgrad())
 
-    @unittest.skip("Not Implemented Yet")
     def test_studentt_d2logpdf_df2(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
@@ -250,34 +243,31 @@ class LaplaceTests(unittest.TestCase):
         grad.checkgrad(verbose=1)
         self.assertTrue(grad.checkgrad())
 
-    @unittest.skip("Not Implemented Yet")
     def test_studentt_d3lik_d3f(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
-        d2logpdf_df2 = functools.partial(self.stu_t.d2logpdf_d2f, y=self.Y)
-        d3logpdf_df3 = functools.partial(self.stu_t.d3logpdf_d3f, y=self.Y)
+        d2logpdf_df2 = functools.partial(self.stu_t.d2logpdf_df2, y=self.Y)
+        d3logpdf_df3 = functools.partial(self.stu_t.d3logpdf_df3, y=self.Y)
         grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, self.f.copy(), 'f')
         grad.randomize()
         grad.checkgrad(verbose=1)
         self.assertTrue(grad.checkgrad())
 
-    @unittest.skip("Not Implemented Yet")
     def test_studentt_dlogpdf_df_dvar(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.stu_t.dlogpdf_df, self.stu_t.dlogpdf_df_dvar,
-                    [self.var], args=(self.Y.copy(), self.f.copy()),
+                dparam_checkgrad(self.stu_t.dlogpdf_df, self.stu_t.dlogpdf_df_dtheta,
+                    [self.var], args=(self.f.copy(), self.Y.copy()),
                     constrain_positive=True, randomize=True, verbose=True)
                 )
 
-    @unittest.skip("Not Implemented Yet")
     def test_studentt_d2logpdf_df2_dvar(self):
         #FIXME: Needs non-identity Link function
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.stu_t.d2logpdf_df2, self.stu_t.d2logpdf_df2_dvar,
-                    [self.var], args=(self.Y.copy(), self.f.copy()),
+                dparam_checkgrad(self.stu_t.d2logpdf_df2, self.stu_t.d2logpdf_df2_dtheta,
+                    [self.var], args=(self.f.copy(), self.Y.copy()),
                     constrain_positive=True, randomize=True, verbose=True)
                 )
 
@@ -312,24 +302,24 @@ class LaplaceTests(unittest.TestCase):
     def test_studentt_dlogpdf_dvar(self):
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.stu_t.logpdf, self.stu_t.dlogpdf_dvar,
-                    [self.var], args=(self.Y.copy(), self.f.copy()),
+                dparam_checkgrad(self.stu_t.logpdf, self.stu_t.dlogpdf_dtheta,
+                    [self.var], args=(self.f.copy(), self.Y.copy()),
                     constrain_positive=True, randomize=True, verbose=True)
                 )
 
     def test_studentt_dlogpdf_dlink_dvar(self):
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.stu_t.dlogpdf_dlink, self.stu_t.dlogpdf_dlink_dvar,
-                    [self.var], args=(self.Y.copy(), self.f.copy()),
+                dparam_checkgrad(self.stu_t.dlogpdf_dlink, self.stu_t.dlogpdf_dlink_dtheta,
+                    [self.var], args=(self.f.copy(), self.Y.copy()),
                     constrain_positive=True, randomize=True, verbose=True)
                 )
 
     def test_studentt_d2logpdf_dlink2_dvar(self):
         print "\n{}".format(inspect.stack()[0][3])
         self.assertTrue(
-                dparam_checkgrad(self.stu_t.d2logpdf_dlink2, self.stu_t.d2logpdf_dlink2_dvar,
-                    [self.var], args=(self.Y.copy(), self.f.copy()),
+                dparam_checkgrad(self.stu_t.d2logpdf_dlink2, self.stu_t.d2logpdf_dlink2_dtheta,
+                    [self.var], args=(self.f.copy(), self.Y.copy()),
                     constrain_positive=True, randomize=True, verbose=True)
                 )
 
@@ -388,7 +378,9 @@ class LaplaceTests(unittest.TestCase):
         m.constrain_positive('t_noise')
         m.constrain_fixed('white', white_var)
         m['t_noise'] = 0.01
+        m.randomize()
         m.checkgrad(verbose=1)
+        print m
         self.assertTrue(m.checkgrad(step=self.step))
 
 if __name__ == "__main__":
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index 5866ecf9..885f9e83 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -4,6 +4,33 @@
 import numpy as np
 from scipy import weave
 
+def chain_1(df_dg, dg_dx):
+    """
+    Generic chaining function for first derivative
+
+    .. math::
+        \\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx}
+    """
+    return df_dg * dg_dx
+
+def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
+    """
+    Generic chaining function for second derivative
+
+    .. math::
+        \\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}}
+    """
+    return d2f_dg2*(dg_dx**2) + df_dg*d2g_dx2
+
+def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
+    """
+    Generic chaining function for third derivative
+
+    .. math::
+        \\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}}
+    """
+    return d3f_dg3*(dg_dx**3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
+
 def opt_wrapper(m, **kwargs):
     """
     This function just wraps the optimization procedure of a GPy