From 787a038401ee959fbbd8bfe354c84c1d4cbd56fa Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 14 May 2013 16:23:18 +0100 Subject: [PATCH] Still getting closer to grads for likelihood --- GPy/examples/laplace_approximations.py | 4 ++-- GPy/likelihoods/Laplace.py | 16 ++++++---------- GPy/likelihoods/likelihood_functions.py | 4 ++-- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index 2054881c..eb725b53 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -95,10 +95,10 @@ def debug_student_t_noise_approx(): m.constrain_positive('t_noi') #m.constrain_fixed('t_noise_variance', real_sd) m.update_likelihood_approximation() - #m.optimize('lbfgsb', messages=True, callback=m._update_params_callback) - m.optimize('scg', messages=True) print(m) return m + #m.optimize('lbfgsb', messages=True, callback=m._update_params_callback) + m.optimize('scg', messages=True) if plot: plt.suptitle('Student-t likelihood') plt.subplot(132) diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index 5b3e8f43..2af51f2b 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -201,24 +201,22 @@ class Laplace(likelihood): dL_dytil, dytil_dfhat = self._shared_gradients_components() #dfhat_dthetaL, dSigmai_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell? - dlikelihood_dthetaL, d2likelihood_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell? + dlikelihoodgrad_dthetaL, d2likelihood_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell? dlikelihood_dfhat = self.likelihood_function.link_grad(self.data, self.f_hat, self.extra_data) - #dfhat_dthetaL_cyclic = 0 #FIXME: what is this? how can dfhat_dthetaL be used in the value of itself? - #dlikelihood_dthetaL_implicit = np.dot(dlikelihood_dfhat, dfhat_dthetaL_cyclic) # may need a sum over f - #dfhat_dthetaL = np.dot(self.K, (dlikelihood_dthetaL_explicit + dlikelihood_dthetaL_implicit)[:, None]) #KW_I_i, _, _, _ = pdinv(np.dot(self.K, self.W) + np.eye(self.N)) KW_I_i = self.Bi # could use self.B_chol?? - dfhat_dthetaL = mdot(KW_I_i, (self.K, dlikelihood_dfhat)) + dfhat_dthetaL = mdot(KW_I_i, (self.K, dlikelihoodgrad_dthetaL)) + #dfhat_dthetaL = np.zeros(dfhat_dthetaL.shape)[:, None] dytil_dthetaL = np.dot(dytil_dfhat, dfhat_dthetaL) #FIXME: Careful the -D*0.5 in dL_d_K_sigma might need to be -0.5? - dL_dSigma = partial #Is actually but can't rename it because of naming convention... dL_d_K_Sigma + dL_dSigma = np.diagflat(partial) #Is actually but can't rename it because of naming convention... dL_d_K_Sigma Wi = np.diagonal(self.Sigma_tilde) #Convenience #-1 as we are looking at W which is -1*d2log p(y|f) #Can just hadamard product as diagonal matricies multiplied are just multiplying elements - dSigma_dthetaL_explicit = np.diagflat(-(Wi*(-1*d2likelihood_dthetaL)*Wi)) + dSigma_dthetaL_explicit = np.diagflat(-1*(Wi*(-1*d2likelihood_dthetaL)*Wi)) d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data) dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi) @@ -242,10 +240,8 @@ class Laplace(likelihood): #dL_dthetaL = 0 + np.dot(dL_dytil, dytil_dthetaL)# + np.dot(dL_dSigma, dSigma_dthetaL) dL_dthetaL_via_ytil = np.sum(np.dot(dL_dytil, dytil_dthetaL), axis=0) - dL_dthetaL_via_Sigma = np.sum(np.dot(dL_dSigma[:, None].T, dSigma_dthetaL), axis=0) + dL_dthetaL_via_Sigma = np.sum(np.sum(np.dot(dL_dSigma, dSigma_dthetaL), axis=0)) dL_dthetaL = dL_dthetaL_via_ytil + dL_dthetaL_via_Sigma - dL_dthetaL_via_Sigma_old = np.sum(np.dot(dL_dSigma, dSigma_dthetaL), axis=0) - import ipdb; ipdb.set_trace() ### XXX BREAKPOINT return np.squeeze(dL_dthetaL) #should be array of length *params-being optimized*, for student t just optimising 1 parameter, this is (1,) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 6eef9f33..1a9dac75 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -256,7 +256,7 @@ class student_t(likelihood_function): f = np.squeeze(f) assert y.shape == f.shape e = y - f - d3link_d3f = ( (2*(self.v + 1)*(e**3 - 3*(self.sigma**2)*self.v*e)) + d3link_d3f = ( (2*(self.v + 1)*(-1*e)*(e**2 - 3*(self.sigma**2)*self.v)) / ((e**2 + (self.sigma**2)*self.v)**3) ) return np.squeeze(d3link_d3f) @@ -286,7 +286,7 @@ class student_t(likelihood_function): f = np.squeeze(f) assert y.shape == f.shape e = y - f - grad_sigma = ( (2*self.sigma*self.v*(self.v + 1)*e) + grad_sigma = ( (-2*self.sigma*self.v*(self.v + 1)*e) / ((self.v*(self.sigma**2) + e**2)**2) ) return np.squeeze(grad_sigma)