mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-18 13:55:14 +02:00
Trying to fix dL_dytil gradient
This commit is contained in:
parent
48d693791e
commit
146d7e2458
2 changed files with 84 additions and 48 deletions
|
|
@ -79,16 +79,29 @@ class Laplace(likelihood):
|
|||
return (self._Kgradients(dL_d_K_Sigma, dK_dthetaK), self._gradients(dL_d_K_Sigma))
|
||||
|
||||
def _shared_gradients_components(self):
|
||||
dL_dytil = -np.dot(self.Y.T, inv(self.K+self.Sigma_tilde)) #or *0.5? Shouldn't this be -y*R
|
||||
Ki, _, _, _ = pdinv(self.K)
|
||||
|
||||
#Y__KS_i = np.dot(self.Y.T, inv(self.K+self.Sigma_tilde))
|
||||
#dL_dytil = -0.5*Y__KS_i #or *0.5? Shouldn't this be -y*R
|
||||
#dL_dytil = -0.5*np.trace(np.dot(inv(self.K+self.Sigma_tilde), (np.dot(self.Y, self.Y.T) + self.Y.T)))
|
||||
#dL_dytil_simple_term = -0.5*np.dot(inv(self.K+self.Sigma_tilde),
|
||||
#dL_dytil_simple_term = -np.dot(self.Y.T, inv(self.K+self.Sigma_tilde), self.Y)
|
||||
c = inv(self.K+self.Sigma_tilde)
|
||||
dL_dytil_simple_term = -0.5*np.diag(np.dot(c, self.Y) + np.dot(self.Y.T, c))
|
||||
|
||||
P = np.diagflat(1/np.dot(Ki, self.f_hat))
|
||||
K_Wi_i = inv(self.K+self.Sigma_tilde)
|
||||
|
||||
dL_dytil_difficult_term = np.diag(( -0.5*(np.dot(self.K + self.Sigma_tilde, P))
|
||||
+0.5*mdot(K_Wi_i, self.Y, self.Y.T, K_Wi_i, P)
|
||||
) * np.eye(self.N))
|
||||
dL_dytil = dL_dytil_simple_term + dL_dytil_difficult_term
|
||||
|
||||
d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
|
||||
Wi = np.diagonal(self.Sigma_tilde) #Convenience
|
||||
#Can just hadamard product as diagonal matricies multiplied are just multiplying elements
|
||||
dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi)
|
||||
|
||||
Ki, _, _, _ = pdinv(self.K)
|
||||
#dytil_dfhat_implicit = np.dot(dWi_dfhat, Ki) + np.eye(self.N)
|
||||
#dytil_dfhat = np.dot(dWi_dfhat, Ki) + np.eye(self.N)
|
||||
|
||||
#Wi(Ki + W) = Wi__Ki_W using the last K prior given to fit_full
|
||||
#dytil_dfhat_explicit = self.Wi__Ki_W
|
||||
|
|
@ -97,6 +110,8 @@ class Laplace(likelihood):
|
|||
|
||||
dytil_dfhat = - np.diagflat(np.dot(dWi_dfhat, np.dot(Ki, self.f_hat))) + np.dot(self.Sigma_tilde, Ki) + np.eye(self.N)
|
||||
self.dytil_dfhat = dytil_dfhat
|
||||
#dytil_dfhat = np.eye(dytil_dfhat.shape[0])
|
||||
self.dL_dfhat = np.dot(dL_dytil, dytil_dfhat) #FIXME: Purely for checkgradding....
|
||||
return dL_dytil, dytil_dfhat
|
||||
|
||||
def _Kgradients(self, dL_d_K_Sigma, dK_dthetaK):
|
||||
|
|
|
|||
|
|
@ -1,26 +1,29 @@
|
|||
import unittest
|
||||
import numpy as np
|
||||
np.random.seed(82)
|
||||
|
||||
import GPy
|
||||
from GPy.models import GP
|
||||
from GPy.util.linalg import pdinv, tdot
|
||||
from scipy import linalg
|
||||
|
||||
class LikelihoodGradParam(GP):
|
||||
def __init__(self, X, likelihood_function, kernel, param_name=None, function=None, **kwargs):
|
||||
super(LikelihoodGradParam, self).__init__(X, likelihood_function, kernel)
|
||||
class LikelihoodParamGrad(GP):
|
||||
def __init__(self, X=None, likelihood_function=None, kernel=None, param_name=None, function=None, dparam_name=None, **kwargs):
|
||||
self.param_name = param_name
|
||||
self.dparam_name = dparam_name
|
||||
self.func = function
|
||||
super(LikelihoodParamGrad, self).__init__(X, likelihood_function, kernel)
|
||||
#self.func_params = kwargs
|
||||
#self.parameter = self.likelihood.__getattribute__(self.param_name)
|
||||
|
||||
def _get_param_names(self):
|
||||
f_hats = ["f_{}".format(i) for i in range(len(self.likelihood.f_hat))]
|
||||
return f_hats
|
||||
params = getattr(self.likelihood, self.dparam_name)
|
||||
params_names = ["{}_{}".format(self.dparam_name, i) for i in range(len(params))]
|
||||
return params_names
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack([np.squeeze(self.likelihood.f_hat)])
|
||||
#return np.hstack([self.likelihood.__getattribute__(self.param_name)])
|
||||
params = getattr(self.likelihood, self.dparam_name)
|
||||
return np.hstack([params])
|
||||
|
||||
def hack_dL_dK(self):
|
||||
self.K = self.kern.K(self.X)
|
||||
|
|
@ -38,29 +41,56 @@ class LikelihoodGradParam(GP):
|
|||
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
|
||||
|
||||
def _set_params(self, x):
|
||||
self.likelihood.f_hat = x.reshape(self.N, 1)
|
||||
raise NotImplementedError
|
||||
|
||||
def log_likelihood(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Likelihood_F_Grad(LikelihoodParamGrad):
|
||||
def __init__(self, **kwargs):
|
||||
super(Likelihood_F_Grad, self).__init__(**kwargs)
|
||||
|
||||
def _set_params(self, x):
|
||||
params = getattr(self.likelihood, self.dparam_name)
|
||||
setattr(self.likelihood, self.dparam_name, x.reshape(*params.shape))
|
||||
self.likelihood._compute_likelihood_variables()
|
||||
self.hack_dL_dK()
|
||||
|
||||
def log_likelihood(self):
|
||||
return self.func(self.likelihood)[0, 0]
|
||||
ll = self.func(self)
|
||||
if self.param_name == "dL_dfhat_":
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
if len(ll.shape) == 0 or len(ll.shape) == 1:
|
||||
return ll.sum()
|
||||
elif len(ll.shape) == 2:
|
||||
#print "Only checking first likelihood"
|
||||
return ll[0, 0]
|
||||
else:
|
||||
raise ValueError('Not implemented for larger matricies yet')
|
||||
return ll
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
#gradient = self.likelihood.__getattribute__(self.param_name)
|
||||
self.likelihood._compute_likelihood_variables()
|
||||
self.likelihood._gradients(partial=np.diag(self.dL_dK))
|
||||
gradient = getattr(self.likelihood, self.param_name)
|
||||
#Need to sum over fhats? For dytil_dfhat...
|
||||
#gradient = np.flatten(gradient, axis=0)
|
||||
#return gradient[:, 0]
|
||||
return gradient[0, :]
|
||||
if len(gradient.shape) == 1:
|
||||
return gradient
|
||||
elif len(gradient.shape) == 2:
|
||||
#print "Only checking first gradients"
|
||||
return gradient[0,: ]
|
||||
else:
|
||||
raise ValueError('Not implemented for larger matricies yet')
|
||||
|
||||
|
||||
class LaplaceTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
real_var = 0.1
|
||||
#Start a function, any function
|
||||
self.X = np.linspace(0.0, 10.0, 30)[:, None]
|
||||
self.X = np.linspace(0.0, 10.0, 4)[:, None]
|
||||
#self.X = np.random.randn(,1)
|
||||
#self.X = np.ones((10,1))
|
||||
Y = np.sin(self.X) + np.random.randn(*self.X.shape)*real_var
|
||||
|
|
@ -74,49 +104,40 @@ class LaplaceTests(unittest.TestCase):
|
|||
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=initial_sd_guess)
|
||||
self.stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, rasm=True)
|
||||
self.stu_t_likelihood.fit_full(self.kernel.K(self.X))
|
||||
self.m = LikelihoodGradParam(self.X, self.stu_t_likelihood, self.kernel, None, None)
|
||||
self.m.constrain_fixed('rbf_v', 1.0898)
|
||||
self.m.constrain_fixed('rbf_l', 1.8651)
|
||||
|
||||
def tearDown(self):
|
||||
self.m = None
|
||||
|
||||
def test_dy_dfhat(self):
|
||||
def ytil(likelihood):
|
||||
Sigma_tilde = likelihood.Sigma_tilde
|
||||
K = likelihood.K
|
||||
def ytil(self):
|
||||
Sigma_tilde = self.likelihood.Sigma_tilde
|
||||
K = self.likelihood.K
|
||||
Ki, _, _, _ = pdinv(K)
|
||||
f_hat = likelihood.f_hat
|
||||
f_hat = self.likelihood.f_hat
|
||||
Sigma, _, _, _ = pdinv(Sigma_tilde)
|
||||
return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat)
|
||||
|
||||
self.m.func = ytil
|
||||
self.m.param_name = 'dytil_dfhat'
|
||||
self.m = Likelihood_F_Grad(X=self.X, likelihood_function=self.stu_t_likelihood,
|
||||
kernel=self.kernel, param_name='dytil_dfhat',
|
||||
function=ytil, dparam_name='f_hat')
|
||||
#self.m.constrain_fixed('rbf_v', 1.0898)
|
||||
#self.m.constrain_fixed('rbf_l', 1.8651)
|
||||
self.m.randomize()
|
||||
#try:
|
||||
self.m.checkgrad(verbose=1)
|
||||
assert self.m.checkgrad()
|
||||
#except:
|
||||
#import ipdb;ipdb.set_trace()
|
||||
|
||||
def test_dL_dfhat(self):
|
||||
def L(self):
|
||||
return np.array(-0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
|
||||
|
||||
#def test_dL_dytil(self):
|
||||
#def L(likelihood):
|
||||
##-0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||
#Sigma_tilde = likelihood.Sigma_tilde
|
||||
#Ki = likelihood.K
|
||||
#f_hat = likelihood.f_hat
|
||||
#Sigma, _, _, _ = pdinv(Sigma_tilde)
|
||||
#return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat)
|
||||
|
||||
#self.m.func = L
|
||||
#self.m.param_name = 'dL_dytil'
|
||||
#m.randomize()
|
||||
##try:
|
||||
#m.checkgrad(verbose=1)
|
||||
#assert m.checkgrad()
|
||||
#except:
|
||||
#import ipdb;ipdb.set_trace()
|
||||
self.m = Likelihood_F_Grad(X=self.X, likelihood_function=self.stu_t_likelihood,
|
||||
kernel=self.kernel, param_name='dL_dfhat',
|
||||
function=L, dparam_name='f_hat')
|
||||
self.m.constrain_fixed('rbf_v', 1.0898)
|
||||
self.m.constrain_fixed('rbf_l', 1.8651)
|
||||
self.m.randomize()
|
||||
self.m.checkgrad(verbose=1)
|
||||
assert self.m.checkgrad()
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue