Still getting closer to grads for likelihood

2026-05-18 13:55:14 +02:00 · 2013-05-14 16:23:18 +01:00 · 2013-05-14 16:23:18 +01:00 · 787a038401
commit 787a038401
parent 5472c5c6ba
3 changed files with 10 additions and 14 deletions
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -95,10 +95,10 @@ def debug_student_t_noise_approx():
    m.constrain_positive('t_noi')
    #m.constrain_fixed('t_noise_variance', real_sd)
    m.update_likelihood_approximation()
-    #m.optimize('lbfgsb', messages=True, callback=m._update_params_callback)
-    m.optimize('scg', messages=True)
    print(m)
    return m
+    #m.optimize('lbfgsb', messages=True, callback=m._update_params_callback)
+    m.optimize('scg', messages=True)
    if plot:
        plt.suptitle('Student-t likelihood')
        plt.subplot(132)
--- a/GPy/likelihoods/Laplace.py
+++ b/GPy/likelihoods/Laplace.py
@ -201,24 +201,22 @@ class Laplace(likelihood):
        dL_dytil, dytil_dfhat = self._shared_gradients_components()
        #dfhat_dthetaL, dSigmai_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?

-        dlikelihood_dthetaL, d2likelihood_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
+        dlikelihoodgrad_dthetaL, d2likelihood_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
        dlikelihood_dfhat = self.likelihood_function.link_grad(self.data, self.f_hat, self.extra_data)
-        #dfhat_dthetaL_cyclic = 0 #FIXME: what is this? how can dfhat_dthetaL be used in the value of itself?
-        #dlikelihood_dthetaL_implicit = np.dot(dlikelihood_dfhat, dfhat_dthetaL_cyclic) # may need a sum over f
-        #dfhat_dthetaL = np.dot(self.K, (dlikelihood_dthetaL_explicit + dlikelihood_dthetaL_implicit)[:, None])
        #KW_I_i, _, _, _ = pdinv(np.dot(self.K, self.W) + np.eye(self.N))
        KW_I_i = self.Bi # could use self.B_chol??
-        dfhat_dthetaL = mdot(KW_I_i, (self.K, dlikelihood_dfhat))
+        dfhat_dthetaL = mdot(KW_I_i, (self.K, dlikelihoodgrad_dthetaL))
+        #dfhat_dthetaL = np.zeros(dfhat_dthetaL.shape)[:, None]

        dytil_dthetaL = np.dot(dytil_dfhat, dfhat_dthetaL)

        #FIXME: Careful the -D*0.5 in dL_d_K_sigma might need to be -0.5?
-        dL_dSigma = partial #Is actually but can't rename it because of naming convention... dL_d_K_Sigma
+        dL_dSigma = np.diagflat(partial) #Is actually but can't rename it because of naming convention... dL_d_K_Sigma

        Wi = np.diagonal(self.Sigma_tilde) #Convenience
        #-1 as we are looking at W which is -1*d2log p(y|f)
        #Can just hadamard product as diagonal matricies multiplied are just multiplying elements
-        dSigma_dthetaL_explicit = np.diagflat(-(Wi*(-1*d2likelihood_dthetaL)*Wi))
+        dSigma_dthetaL_explicit = np.diagflat(-1*(Wi*(-1*d2likelihood_dthetaL)*Wi))

        d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
        dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi)
@ -242,10 +240,8 @@ class Laplace(likelihood):
        #dL_dthetaL = 0 + np.dot(dL_dytil, dytil_dthetaL)# + np.dot(dL_dSigma, dSigma_dthetaL)

        dL_dthetaL_via_ytil = np.sum(np.dot(dL_dytil, dytil_dthetaL), axis=0)
-        dL_dthetaL_via_Sigma = np.sum(np.dot(dL_dSigma[:, None].T, dSigma_dthetaL), axis=0)
+        dL_dthetaL_via_Sigma = np.sum(np.sum(np.dot(dL_dSigma, dSigma_dthetaL), axis=0))
        dL_dthetaL = dL_dthetaL_via_ytil + dL_dthetaL_via_Sigma
-        dL_dthetaL_via_Sigma_old = np.sum(np.dot(dL_dSigma, dSigma_dthetaL), axis=0)
-        import ipdb; ipdb.set_trace() ### XXX BREAKPOINT

        return np.squeeze(dL_dthetaL) #should be array of length *params-being optimized*, for student t just optimising 1 parameter, this is (1,)

--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -256,7 +256,7 @@ class student_t(likelihood_function):
        f = np.squeeze(f)
        assert y.shape == f.shape
        e = y - f
-        d3link_d3f = (  (2*(self.v + 1)*(e**3 - 3*(self.sigma**2)*self.v*e))
+        d3link_d3f = (  (2*(self.v + 1)*(-1*e)*(e**2 - 3*(self.sigma**2)*self.v))
                      / ((e**2 + (self.sigma**2)*self.v)**3)
                     )
        return np.squeeze(d3link_d3f)
@ -286,7 +286,7 @@ class student_t(likelihood_function):
        f = np.squeeze(f)
        assert y.shape == f.shape
        e = y - f
-        grad_sigma = (  (2*self.sigma*self.v*(self.v + 1)*e)
+        grad_sigma = (  (-2*self.sigma*self.v*(self.v + 1)*e)
                      / ((self.v*(self.sigma**2) + e**2)**2)
                     )
        return np.squeeze(grad_sigma)