mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-30 14:35:15 +02:00
Seem to have gradients much closer now
This commit is contained in:
parent
84f12c1079
commit
6c4866662c
4 changed files with 110 additions and 60 deletions
|
|
@ -36,6 +36,7 @@ def timing():
|
||||||
print np.mean(the_is)
|
print np.mean(the_is)
|
||||||
|
|
||||||
def debug_student_t_noise_approx():
|
def debug_student_t_noise_approx():
|
||||||
|
plot = False
|
||||||
real_var = 0.1
|
real_var = 0.1
|
||||||
#Start a function, any function
|
#Start a function, any function
|
||||||
X = np.linspace(0.0, 10.0, 30)[:, None]
|
X = np.linspace(0.0, 10.0, 30)[:, None]
|
||||||
|
|
@ -57,8 +58,6 @@ def debug_student_t_noise_approx():
|
||||||
#Y += noise
|
#Y += noise
|
||||||
|
|
||||||
plt.close('all')
|
plt.close('all')
|
||||||
plt.figure(1)
|
|
||||||
plt.suptitle('Gaussian likelihood')
|
|
||||||
# Kernel object
|
# Kernel object
|
||||||
kernel1 = GPy.kern.rbf(X.shape[1])
|
kernel1 = GPy.kern.rbf(X.shape[1])
|
||||||
kernel2 = kernel1.copy()
|
kernel2 = kernel1.copy()
|
||||||
|
|
@ -75,12 +74,14 @@ def debug_student_t_noise_approx():
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize()
|
m.optimize()
|
||||||
# plot
|
# plot
|
||||||
plt.subplot(131)
|
if plot:
|
||||||
m.plot()
|
plt.figure(1)
|
||||||
plt.plot(X_full, Y_full)
|
plt.suptitle('Gaussian likelihood')
|
||||||
|
plt.subplot(131)
|
||||||
|
m.plot()
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
print m
|
print m
|
||||||
|
|
||||||
plt.suptitle('Student-t likelihood')
|
|
||||||
edited_real_sd = initial_var_guess #real_sd
|
edited_real_sd = initial_var_guess #real_sd
|
||||||
|
|
||||||
print "Clean student t, rasm"
|
print "Clean student t, rasm"
|
||||||
|
|
@ -91,10 +92,12 @@ def debug_student_t_noise_approx():
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
m.optimize()
|
m.optimize()
|
||||||
print(m)
|
print(m)
|
||||||
plt.subplot(132)
|
if plot:
|
||||||
m.plot()
|
plt.suptitle('Student-t likelihood')
|
||||||
plt.plot(X_full, Y_full)
|
plt.subplot(132)
|
||||||
plt.ylim(-2.5, 2.5)
|
m.plot()
|
||||||
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-2.5, 2.5)
|
||||||
|
|
||||||
print "Clean student t, ncg"
|
print "Clean student t, ncg"
|
||||||
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
||||||
|
|
@ -104,12 +107,13 @@ def debug_student_t_noise_approx():
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
m.optimize()
|
m.optimize()
|
||||||
print(m)
|
print(m)
|
||||||
plt.subplot(133)
|
if plot:
|
||||||
m.plot()
|
plt.subplot(133)
|
||||||
plt.plot(X_full, Y_full)
|
m.plot()
|
||||||
plt.ylim(-2.5, 2.5)
|
plt.plot(X_full, Y_full)
|
||||||
|
plt.ylim(-2.5, 2.5)
|
||||||
|
|
||||||
plt.show()
|
#plt.show()
|
||||||
|
|
||||||
def student_t_approx():
|
def student_t_approx():
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,8 @@ from scipy.linalg import inv, cho_solve, det
|
||||||
from numpy.linalg import cond
|
from numpy.linalg import cond
|
||||||
from likelihood import likelihood
|
from likelihood import likelihood
|
||||||
from ..util.linalg import pdinv, mdot, jitchol, chol_inv, det_ln_diag, pddet
|
from ..util.linalg import pdinv, mdot, jitchol, chol_inv, det_ln_diag, pddet
|
||||||
from scipy.linalg.flapack import dtrtrs
|
from scipy.linalg.lapack import dtrtrs
|
||||||
import pylab as plt
|
#import pylab as plt
|
||||||
|
|
||||||
|
|
||||||
class Laplace(likelihood):
|
class Laplace(likelihood):
|
||||||
|
|
@ -79,9 +79,9 @@ class Laplace(likelihood):
|
||||||
return (self._Kgradients(dL_d_K_Sigma, dK_dthetaK), self._gradients(dL_d_K_Sigma))
|
return (self._Kgradients(dL_d_K_Sigma, dK_dthetaK), self._gradients(dL_d_K_Sigma))
|
||||||
|
|
||||||
def _shared_gradients_components(self):
|
def _shared_gradients_components(self):
|
||||||
dL_dytil = -np.dot(self.Y.T, (self.K+self.Sigma_tilde)) #or *0.5?
|
dL_dytil = -np.dot(self.Y.T, (self.K+self.Sigma_tilde)) #or *0.5? Shouldn't this be -y*R
|
||||||
dytil_dfhat = self.Wi__Ki_W # np.dot(self.Sigma_tilde, self.Ki) + np.eye(self.N) # or self.Wi__Ki_W?
|
dytil_dfhat = self.Wi__Ki_W # np.dot(self.Sigma_tilde, self.Ki) + np.eye(self.N) # or self.Wi__Ki_W?
|
||||||
#Ki = inv(self.K)
|
#Ki, _, _, _ = pdinv(self.K)
|
||||||
#dytil_dfhat = np.dot(self.Sigma_tilde, Ki) + np.eye(self.N) # or self.Wi__Ki_W?
|
#dytil_dfhat = np.dot(self.Sigma_tilde, Ki) + np.eye(self.N) # or self.Wi__Ki_W?
|
||||||
return dL_dytil, dytil_dfhat
|
return dL_dytil, dytil_dfhat
|
||||||
|
|
||||||
|
|
@ -95,9 +95,8 @@ class Laplace(likelihood):
|
||||||
"""
|
"""
|
||||||
dL_dytil, dytil_dfhat = self._shared_gradients_components()
|
dL_dytil, dytil_dfhat = self._shared_gradients_components()
|
||||||
|
|
||||||
d3phi_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
|
|
||||||
|
|
||||||
dSigma_dfhat = -np.dot(self.Sigma_tilde, np.dot(d3phi_d3fhat, self.Sigma_tilde))
|
#dSigma_dfhat = -np.dot(self.Sigma_tilde, np.dot(d3phi_d3fhat, self.Sigma_tilde))
|
||||||
|
|
||||||
print "Computing K gradients"
|
print "Computing K gradients"
|
||||||
print "dytil_dfhat: ", np.mean(dytil_dfhat)
|
print "dytil_dfhat: ", np.mean(dytil_dfhat)
|
||||||
|
|
@ -107,7 +106,8 @@ class Laplace(likelihood):
|
||||||
#plt.imshow(A)
|
#plt.imshow(A)
|
||||||
#plt.show()
|
#plt.show()
|
||||||
|
|
||||||
I_KW_i, _, _, _ = pdinv(A) #FIXME: WHY SO MUCH JITTER?!
|
#I_KW_i, _, _, _ = pdinv(A) #FIXME: WHY SO MUCH JITTER?!
|
||||||
|
I_KW_i = self.Bi # could use self.B_chol??
|
||||||
|
|
||||||
#FIXME: Careful dK_dthetaK is not the derivative with respect to the marginal just prior K!
|
#FIXME: Careful dK_dthetaK is not the derivative with respect to the marginal just prior K!
|
||||||
#Derivative for each f dimension, for each of K's hyper parameters
|
#Derivative for each f dimension, for each of K's hyper parameters
|
||||||
|
|
@ -117,25 +117,44 @@ class Laplace(likelihood):
|
||||||
dfhat_dthetaK[:, ind_j] = np.dot(I_KW_i, np.dot(thetaj, grad))
|
dfhat_dthetaK[:, ind_j] = np.dot(I_KW_i, np.dot(thetaj, grad))
|
||||||
|
|
||||||
dytil_dthetaK = np.dot(dytil_dfhat, dfhat_dthetaK) # should be (D,thetaK)
|
dytil_dthetaK = np.dot(dytil_dfhat, dfhat_dthetaK) # should be (D,thetaK)
|
||||||
#FIXME: Careful dL_dK = dL_d_K_Sigma
|
|
||||||
#FIXME: Careful the -D*0.5 in dL_d_K_sigma might need to be -0.5?
|
#FIXME: Careful the -D*0.5 in dL_d_K_sigma might need to be -0.5?
|
||||||
dL_dSigma = dL_d_K_Sigma
|
dL_dSigma = dL_d_K_Sigma
|
||||||
#d3phi_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
|
#d3phi_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
|
||||||
#explicit #implicit
|
#explicit #implicit
|
||||||
dSigmai_dthetaK = 0 + np.dot(d3phi_d3fhat, dfhat_dthetaK)
|
#dSigmai_dthetaK = 0 + np.dot(d3phi_d3fhat, dfhat_dthetaK)
|
||||||
dSigma_dthetaK = np.zeros((self.f_hat.shape[0], self.f_hat.shape[0], dK_dthetaK.shape[0]))
|
#dSigma_dthetaK = np.zeros((self.f_hat.shape[0], self.f_hat.shape[0], dK_dthetaK.shape[0]))
|
||||||
for ind_j, dSigmai_dthetaj in enumerate(dSigmai_dthetaK):
|
d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
|
||||||
dSigma_dthetaK[:, :, ind_j] = -np.dot(self.Sigma_tilde, dSigmai_dthetaj*self.Sigma_tilde)
|
Wi = np.diagonal(self.Sigma_tilde) #Convenience
|
||||||
|
dSigma_dthetaK_explicit = 0
|
||||||
print "dL_dytil: ", np.mean(dL_dytil)
|
#Can just hadamard product as diagonal matricies multiplied are just multiplying elements
|
||||||
print "dytil_dthetaK: ", np.mean(dytil_dthetaK)
|
dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi)
|
||||||
|
#dSigma_dthetaK_implicit = -np.sum(np.dot(dWi_dfhat, dfhat_dthetaK), axis=0)
|
||||||
|
dSigma_dthetaK_implicit = np.dot(dWi_dfhat, dfhat_dthetaK)
|
||||||
|
dSigma_dthetaK = dSigma_dthetaK_explicit + dSigma_dthetaK_implicit
|
||||||
|
#dSigma_dthetaK = 0 + np.dot(, dfhat_dthetaK)
|
||||||
|
#for ind_j, dSigmai_dthetaj in enumerate(dSigmai_dthetaK):
|
||||||
|
#dSigma_dthetaK_explicit = 0
|
||||||
|
#dSigma_dthetaK_implicit = -np.dot(Wi, dW_dfhat
|
||||||
|
#dSigma_dthetaK[:, :, ind_j] = -np.dot(self.Sigma_tilde, dSigmai_dthetaj*self.Sigma_tilde)
|
||||||
|
|
||||||
#FIXME: Won't handle multi dimensional data
|
#FIXME: Won't handle multi dimensional data
|
||||||
dL_dthetaK_via_ytil = np.sum(np.dot(dL_dytil, dytil_dthetaK), axis=0)
|
dL_dthetaK_via_ytil = np.sum(np.dot(dL_dytil, dytil_dthetaK), axis=0)
|
||||||
dL_dthetaK_via_Sigma = np.sum(np.dot(dL_dSigma, dSigma_dthetaK), axis=(0,1))
|
dL_dthetaK_via_Sigma = np.sum(np.dot(dL_dSigma, dSigma_dthetaK), axis=0)
|
||||||
dL_dthetaK_implicit = dL_dthetaK_via_ytil + dL_dthetaK_via_Sigma
|
dL_dthetaK_implicit = dL_dthetaK_via_ytil + dL_dthetaK_via_Sigma
|
||||||
#dL_dthetaK_implicit = np.dot(dL_dytil.T, dytil_dthetaK.T)
|
#dL_dthetaK_implicit = np.dot(dL_dytil.T, dytil_dthetaK.T)
|
||||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
|
||||||
|
#print "\n"
|
||||||
|
#print "dL_dytil: ", np.mean(dL_dytil)
|
||||||
|
#print "dytil_dthetaK: ", np.mean(dytil_dthetaK)
|
||||||
|
#print "dL_dthetaK_via_ytil: ", dL_dthetaK_via_ytil
|
||||||
|
#print "\n"
|
||||||
|
#print "dL_dSigma: ", np.mean(dL_dSigma)
|
||||||
|
#print "dSigma_dthetaK: ", np.mean(dSigma_dthetaK)
|
||||||
|
#print "dL_dthetaK_via_Sigma: ", dL_dthetaK_via_Sigma
|
||||||
|
#print "\n"
|
||||||
|
#print "dL_dthetaK_implicit: ", dL_dthetaK_implicit
|
||||||
|
#import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||||
|
|
||||||
return np.squeeze(dL_dthetaK_implicit)
|
return np.squeeze(dL_dthetaK_implicit)
|
||||||
|
|
||||||
def _gradients(self, partial):
|
def _gradients(self, partial):
|
||||||
|
|
@ -159,27 +178,51 @@ class Laplace(likelihood):
|
||||||
dW_dthetaX = d_dthetaX[d2phi_d2fhat]
|
dW_dthetaX = d_dthetaX[d2phi_d2fhat]
|
||||||
d2phi_d2fhat = Hessian function of likelihood
|
d2phi_d2fhat = Hessian function of likelihood
|
||||||
|
|
||||||
partial = dL_dK
|
partial = dL_d_K_Sigma
|
||||||
"""
|
"""
|
||||||
dL_dytil, dytil_dfhat = self._shared_gradients_components()
|
dL_dytil, dytil_dfhat = self._shared_gradients_components()
|
||||||
dfhat_dthetaL, dSigmai_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
|
#dfhat_dthetaL, dSigmai_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
|
||||||
|
|
||||||
|
dlikelihood_dthetaL_explicit, d2likelihood_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
|
||||||
|
dlikelihood_dfhat = self.likelihood_function.link_hess(self.data, self.f_hat, self.extra_data)
|
||||||
|
dfhat_dthetaL_cyclic = 0 #what is this? how can dfhat_dthetaL be used in the value of itself?
|
||||||
|
dlikelihood_dthetaL_implicit = np.dot(dlikelihood_dfhat, dfhat_dthetaL_cyclic) # may need a sum over f
|
||||||
|
dfhat_dthetaL = np.dot(self.K, (dlikelihood_dthetaL_explicit + dlikelihood_dthetaL_implicit)[:, None])
|
||||||
|
dytil_dthetaL = np.dot(dytil_dfhat, dfhat_dthetaL)
|
||||||
|
|
||||||
|
#FIXME: Careful the -D*0.5 in dL_d_K_sigma might need to be -0.5?
|
||||||
|
dL_dSigma = partial #Is actually but can't rename it because of naming convention... dL_d_K_Sigma
|
||||||
|
|
||||||
|
Wi = np.diagonal(self.Sigma_tilde) #Convenience
|
||||||
|
#-1 as we are looking at W which is -1*d2log p(y|f)
|
||||||
|
#Can just hadamard product as diagonal matricies multiplied are just multiplying elements
|
||||||
|
dSigma_dthetaL_explicit = np.diagflat(-(Wi*(-1*d2likelihood_dthetaL)*Wi))
|
||||||
|
|
||||||
|
d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
|
||||||
|
dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi)
|
||||||
|
dSigma_dthetaL_implicit = np.dot(dWi_dfhat, dfhat_dthetaL_cyclic)
|
||||||
|
dSigma_dthetaL = dSigma_dthetaL_explicit + dSigma_dthetaL_implicit
|
||||||
|
|
||||||
#dSigmai_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
|
#dSigmai_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat, self.extra_data) #FIXME: Shouldn't this have a implicit component aswell?
|
||||||
#Derivative for each f dimension, for each of K's hyper parameters
|
#Derivative for each f dimension, for each of K's hyper parameters
|
||||||
dSigma_dthetaL = np.empty((self.N, len(self.likelihood_function._get_param_names())))
|
#dSigma_dthetaL = np.empty((self.N, len(self.likelihood_function._get_param_names())))
|
||||||
for ind_l, dSigmai_dtheta_l in enumerate(dSigmai_dthetaL.T):
|
#for ind_l, dSigmai_dtheta_l in enumerate(dSigmai_dthetaL.T):
|
||||||
dSigma_dthetaL[:, ind_l] = -mdot(self.Sigma_tilde,
|
#dSigma_dthetaL[:, ind_l] = -mdot(self.Sigma_tilde,
|
||||||
dSigmai_dtheta_l, # Careful, shouldn't this be (N, 1)?
|
#dSigmai_dtheta_l, # Careful, shouldn't this be (N, 1)?
|
||||||
self.Sigma_tilde
|
#self.Sigma_tilde
|
||||||
)
|
#)
|
||||||
|
|
||||||
#TODO: This is Wi*A*Wi, can be more numerically stable with a trick
|
#TODO: This is Wi*A*Wi, can be more numerically stable with a trick
|
||||||
#dSigma_dthetaL = -mdot(self.Sigma_tilde, dSigmai_dthetaL, self.Sigma_tilde)
|
#dSigma_dthetaL = -mdot(self.Sigma_tilde, dSigmai_dthetaL, self.Sigma_tilde)
|
||||||
dL_dSigma = partial # partial is dL_dK but K here is K+Sigma_tilde.... which is fine in this case
|
|
||||||
|
|
||||||
#dytil_dthetaL = dytil_dfhat*dfhat_dthetaL
|
#dytil_dthetaL = dytil_dfhat*dfhat_dthetaL
|
||||||
dytil_dthetaL = np.dot(dytil_dfhat, dfhat_dthetaL)
|
#dytil_dthetaL = np.dot(dytil_dfhat, dfhat_dthetaL)
|
||||||
dL_dthetaL = 0 + np.dot(dL_dytil, dytil_dthetaL)# + np.dot(dL_dSigma, dSigma_dthetaL)
|
#dL_dthetaL = 0 + np.dot(dL_dytil, dytil_dthetaL)# + np.dot(dL_dSigma, dSigma_dthetaL)
|
||||||
|
|
||||||
|
dL_dthetaL_via_ytil = np.sum(np.dot(dL_dytil, dytil_dthetaL), axis=0)
|
||||||
|
dL_dthetaL_via_Sigma = np.sum(np.dot(dL_dSigma, dSigma_dthetaL), axis=0)
|
||||||
|
dL_dthetaL = dL_dthetaL_via_ytil + dL_dthetaL_via_Sigma
|
||||||
|
|
||||||
return np.squeeze(dL_dthetaL) #should be array of length *params-being optimized*, for student t just optimising 1 parameter, this is (1,)
|
return np.squeeze(dL_dthetaL) #should be array of length *params-being optimized*, for student t just optimising 1 parameter, this is (1,)
|
||||||
|
|
||||||
def _compute_GP_variables(self):
|
def _compute_GP_variables(self):
|
||||||
|
|
|
||||||
|
|
@ -248,17 +248,16 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
Third order derivative link_function (log-likelihood ) at y given f f_j w.r.t f and f_j
|
Third order derivative link_function (log-likelihood ) at y given f f_j w.r.t f and f_j
|
||||||
|
|
||||||
$$\frac{-2(v+1)((f-y)^{3} - 3\sigma^{2}v(f-y))}{((f-y)^{2} + \sigma^{2}v)^{3}}$$
|
$$\frac{2(v+1)((y-f)^{3} - 3\sigma^{2}v(y-f))}{((y-f)^{2} + \sigma^{2}v)^{3}}$$
|
||||||
"""
|
"""
|
||||||
y = np.squeeze(y)
|
y = np.squeeze(y)
|
||||||
f = np.squeeze(f)
|
f = np.squeeze(f)
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
#NB f-y not y-f
|
e = y - f
|
||||||
e = f - y
|
d3link_d3f = ( (2*(self.v + 1)*(e**3 - 3*(self.sigma**2)*self.v*e))
|
||||||
d3link_d3f = ( (-2*(self.v + 1)*(e**3 - 3*(self.sigma**2)*self.v*e))
|
|
||||||
/ ((e**2 + (self.sigma**2)*self.v)**3)
|
/ ((e**2 + (self.sigma**2)*self.v)**3)
|
||||||
)
|
)
|
||||||
return d3link_d3f
|
return np.squeeze(d3link_d3f)
|
||||||
|
|
||||||
def link_hess_grad_std(self, y, f, extra_data=None):
|
def link_hess_grad_std(self, y, f, extra_data=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -270,10 +269,10 @@ class student_t(likelihood_function):
|
||||||
f = np.squeeze(f)
|
f = np.squeeze(f)
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
hess_grad_sigma = ( (2*self.sigma*(self.v + 1)*((self.sigma**2)*self.v - 3*(e**2)))
|
hess_grad_sigma = ( (2*self.sigma*self.v*(self.v + 1)*((self.sigma**2)*self.v - 3*(e**2)))
|
||||||
/ ((e**2 + (self.sigma**2)*self.v)**3)
|
/ ((e**2 + (self.sigma**2)*self.v)**3)
|
||||||
)
|
)
|
||||||
return hess_grad_sigma
|
return np.squeeze(hess_grad_sigma)
|
||||||
|
|
||||||
def link_grad_std(self, y, f, extra_data=None):
|
def link_grad_std(self, y, f, extra_data=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -288,11 +287,11 @@ class student_t(likelihood_function):
|
||||||
grad_sigma = ( (-2*self.sigma*self.v*(self.v + 1)*e)
|
grad_sigma = ( (-2*self.sigma*self.v*(self.v + 1)*e)
|
||||||
/ ((self.v*(self.sigma**2) + e**2)**2)
|
/ ((self.v*(self.sigma**2) + e**2)**2)
|
||||||
)
|
)
|
||||||
return grad_sigma
|
return np.squeeze(grad_sigma)
|
||||||
|
|
||||||
def _gradients(self, y, f, extra_data=None):
|
def _gradients(self, y, f, extra_data=None):
|
||||||
return [self.link_grad_std(y, f, extra_data=extra_data)[:, None],
|
return [self.link_grad_std(y, f, extra_data=extra_data),
|
||||||
self.link_hess_grad_std(y, f, extra_data=extra_data)[:, None]] # list as we might learn many parameters
|
self.link_hess_grad_std(y, f, extra_data=extra_data)] # list as we might learn many parameters
|
||||||
|
|
||||||
def predictive_values(self, mu, var):
|
def predictive_values(self, mu, var):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -125,19 +125,23 @@ class GP(model):
|
||||||
if isinstance(self.likelihood, Laplace):
|
if isinstance(self.likelihood, Laplace):
|
||||||
dL_dthetaK_explicit = dL_dthetaK
|
dL_dthetaK_explicit = dL_dthetaK
|
||||||
#Need to pass in a matrix of ones to get access to raw dK_dthetaK values without being chained
|
#Need to pass in a matrix of ones to get access to raw dK_dthetaK values without being chained
|
||||||
fake_dL_dKs = np.ones(self.dL_dK.shape)
|
fake_dL_dKs = np.eye(self.dL_dK.shape[0])
|
||||||
dK_dthetaK = self.kern.dK_dtheta(dL_dK=fake_dL_dKs, X=self.X)
|
dK_dthetaK = self.kern.dK_dtheta(dL_dK=fake_dL_dKs, X=self.X)
|
||||||
|
|
||||||
dL_dthetaK_implicit = self.likelihood._Kgradients(self.dL_dK, dK_dthetaK)
|
#We need the dL_dK where K is equal to the prior K, not K+Sigma as is the case now
|
||||||
|
dL_dthetaK_implicit = self.likelihood._Kgradients(dL_d_K_Sigma=self.dL_dK, dK_dthetaK=dK_dthetaK)
|
||||||
dL_dthetaK = dL_dthetaK_explicit + dL_dthetaK_implicit
|
dL_dthetaK = dL_dthetaK_explicit + dL_dthetaK_implicit
|
||||||
|
|
||||||
print "dL_dthetaK_explicit: {dldkx} dL_dthetaK_implicit: {dldki} dL_dthetaK: {dldk}".format(dldkx=dL_dthetaK_explicit, dldki=dL_dthetaK_implicit, dldk=dL_dthetaK)
|
#print "dL_dthetaK_explicit: {dldkx} dL_dthetaK_implicit: {dldki} dL_dthetaK: {dldk}".format(dldkx=dL_dthetaK_explicit, dldki=dL_dthetaK_implicit, dldk=dL_dthetaK)
|
||||||
|
|
||||||
dL_dthetaL = self.likelihood._gradients(partial=self.dL_dK)
|
|
||||||
else:
|
|
||||||
print "dL_dthetaK: ", dL_dthetaK
|
|
||||||
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
|
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
|
||||||
print "dL_dthetaL: ", dL_dthetaL
|
print "Stacked dL_dthetaK, dL_dthetaL: ", np.hstack((dL_dthetaK, dL_dthetaL))
|
||||||
|
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||||
|
else:
|
||||||
|
#print "dL_dthetaK: ", dL_dthetaK
|
||||||
|
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
|
||||||
|
print "Stacked dL_dthetaK, dL_dthetaL: ", np.hstack((dL_dthetaK, dL_dthetaL))
|
||||||
|
#print "dL_dthetaL: ", dL_dthetaL
|
||||||
return np.hstack((dL_dthetaK, dL_dthetaL))
|
return np.hstack((dL_dthetaK, dL_dthetaL))
|
||||||
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue