Still getting closer to grads for likelihood

This commit is contained in:
Alan Saul 2013-05-14 16:23:18 +01:00
parent 5472c5c6ba
commit 787a038401
3 changed files with 10 additions and 14 deletions

View file

@ -95,10 +95,10 @@ def debug_student_t_noise_approx():
m.constrain_positive('t_noi')
#m.constrain_fixed('t_noise_variance', real_sd)
m.update_likelihood_approximation()
#m.optimize('lbfgsb', messages=True, callback=m._update_params_callback)
m.optimize('scg', messages=True)
print(m)
return m
#m.optimize('lbfgsb', messages=True, callback=m._update_params_callback)
m.optimize('scg', messages=True)
if plot:
plt.suptitle('Student-t likelihood')
plt.subplot(132)

View file

@ -201,24 +201,22 @@ class Laplace(likelihood):
dL_dytil, dytil_dfhat = self._shared_gradients_components()
#dfhat_dthetaL, dSigmai_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
dlikelihood_dthetaL, d2likelihood_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
dlikelihoodgrad_dthetaL, d2likelihood_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
dlikelihood_dfhat = self.likelihood_function.link_grad(self.data, self.f_hat, self.extra_data)
#dfhat_dthetaL_cyclic = 0 #FIXME: what is this? how can dfhat_dthetaL be used in the value of itself?
#dlikelihood_dthetaL_implicit = np.dot(dlikelihood_dfhat, dfhat_dthetaL_cyclic) # may need a sum over f
#dfhat_dthetaL = np.dot(self.K, (dlikelihood_dthetaL_explicit + dlikelihood_dthetaL_implicit)[:, None])
#KW_I_i, _, _, _ = pdinv(np.dot(self.K, self.W) + np.eye(self.N))
KW_I_i = self.Bi # could use self.B_chol??
dfhat_dthetaL = mdot(KW_I_i, (self.K, dlikelihood_dfhat))
dfhat_dthetaL = mdot(KW_I_i, (self.K, dlikelihoodgrad_dthetaL))
#dfhat_dthetaL = np.zeros(dfhat_dthetaL.shape)[:, None]
dytil_dthetaL = np.dot(dytil_dfhat, dfhat_dthetaL)
#FIXME: Careful the -D*0.5 in dL_d_K_sigma might need to be -0.5?
dL_dSigma = partial #Is actually but can't rename it because of naming convention... dL_d_K_Sigma
dL_dSigma = np.diagflat(partial) #Is actually but can't rename it because of naming convention... dL_d_K_Sigma
Wi = np.diagonal(self.Sigma_tilde) #Convenience
#-1 as we are looking at W which is -1*d2log p(y|f)
#Can just hadamard product as diagonal matricies multiplied are just multiplying elements
dSigma_dthetaL_explicit = np.diagflat(-(Wi*(-1*d2likelihood_dthetaL)*Wi))
dSigma_dthetaL_explicit = np.diagflat(-1*(Wi*(-1*d2likelihood_dthetaL)*Wi))
d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi)
@ -242,10 +240,8 @@ class Laplace(likelihood):
#dL_dthetaL = 0 + np.dot(dL_dytil, dytil_dthetaL)# + np.dot(dL_dSigma, dSigma_dthetaL)
dL_dthetaL_via_ytil = np.sum(np.dot(dL_dytil, dytil_dthetaL), axis=0)
dL_dthetaL_via_Sigma = np.sum(np.dot(dL_dSigma[:, None].T, dSigma_dthetaL), axis=0)
dL_dthetaL_via_Sigma = np.sum(np.sum(np.dot(dL_dSigma, dSigma_dthetaL), axis=0))
dL_dthetaL = dL_dthetaL_via_ytil + dL_dthetaL_via_Sigma
dL_dthetaL_via_Sigma_old = np.sum(np.dot(dL_dSigma, dSigma_dthetaL), axis=0)
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
return np.squeeze(dL_dthetaL) #should be array of length *params-being optimized*, for student t just optimising 1 parameter, this is (1,)

View file

@ -256,7 +256,7 @@ class student_t(likelihood_function):
f = np.squeeze(f)
assert y.shape == f.shape
e = y - f
d3link_d3f = ( (2*(self.v + 1)*(e**3 - 3*(self.sigma**2)*self.v*e))
d3link_d3f = ( (2*(self.v + 1)*(-1*e)*(e**2 - 3*(self.sigma**2)*self.v))
/ ((e**2 + (self.sigma**2)*self.v)**3)
)
return np.squeeze(d3link_d3f)
@ -286,7 +286,7 @@ class student_t(likelihood_function):
f = np.squeeze(f)
assert y.shape == f.shape
e = y - f
grad_sigma = ( (2*self.sigma*self.v*(self.v + 1)*e)
grad_sigma = ( (-2*self.sigma*self.v*(self.v + 1)*e)
/ ((self.v*(self.sigma**2) + e**2)**2)
)
return np.squeeze(grad_sigma)