Trying to fix dL_dytil gradient

This commit is contained in:
Alan Saul 2013-05-17 17:42:00 +01:00
parent 48d693791e
commit 146d7e2458
2 changed files with 84 additions and 48 deletions

View file

@ -79,16 +79,29 @@ class Laplace(likelihood):
return (self._Kgradients(dL_d_K_Sigma, dK_dthetaK), self._gradients(dL_d_K_Sigma)) return (self._Kgradients(dL_d_K_Sigma, dK_dthetaK), self._gradients(dL_d_K_Sigma))
def _shared_gradients_components(self): def _shared_gradients_components(self):
dL_dytil = -np.dot(self.Y.T, inv(self.K+self.Sigma_tilde)) #or *0.5? Shouldn't this be -y*R Ki, _, _, _ = pdinv(self.K)
#Y__KS_i = np.dot(self.Y.T, inv(self.K+self.Sigma_tilde))
#dL_dytil = -0.5*Y__KS_i #or *0.5? Shouldn't this be -y*R
#dL_dytil = -0.5*np.trace(np.dot(inv(self.K+self.Sigma_tilde), (np.dot(self.Y, self.Y.T) + self.Y.T)))
#dL_dytil_simple_term = -0.5*np.dot(inv(self.K+self.Sigma_tilde),
#dL_dytil_simple_term = -np.dot(self.Y.T, inv(self.K+self.Sigma_tilde), self.Y)
c = inv(self.K+self.Sigma_tilde)
dL_dytil_simple_term = -0.5*np.diag(np.dot(c, self.Y) + np.dot(self.Y.T, c))
P = np.diagflat(1/np.dot(Ki, self.f_hat))
K_Wi_i = inv(self.K+self.Sigma_tilde)
dL_dytil_difficult_term = np.diag(( -0.5*(np.dot(self.K + self.Sigma_tilde, P))
+0.5*mdot(K_Wi_i, self.Y, self.Y.T, K_Wi_i, P)
) * np.eye(self.N))
dL_dytil = dL_dytil_simple_term + dL_dytil_difficult_term
d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data) d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data)
Wi = np.diagonal(self.Sigma_tilde) #Convenience Wi = np.diagonal(self.Sigma_tilde) #Convenience
#Can just hadamard product as diagonal matricies multiplied are just multiplying elements #Can just hadamard product as diagonal matricies multiplied are just multiplying elements
dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi) dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi)
Ki, _, _, _ = pdinv(self.K)
#dytil_dfhat_implicit = np.dot(dWi_dfhat, Ki) + np.eye(self.N)
#dytil_dfhat = np.dot(dWi_dfhat, Ki) + np.eye(self.N)
#Wi(Ki + W) = Wi__Ki_W using the last K prior given to fit_full #Wi(Ki + W) = Wi__Ki_W using the last K prior given to fit_full
#dytil_dfhat_explicit = self.Wi__Ki_W #dytil_dfhat_explicit = self.Wi__Ki_W
@ -97,6 +110,8 @@ class Laplace(likelihood):
dytil_dfhat = - np.diagflat(np.dot(dWi_dfhat, np.dot(Ki, self.f_hat))) + np.dot(self.Sigma_tilde, Ki) + np.eye(self.N) dytil_dfhat = - np.diagflat(np.dot(dWi_dfhat, np.dot(Ki, self.f_hat))) + np.dot(self.Sigma_tilde, Ki) + np.eye(self.N)
self.dytil_dfhat = dytil_dfhat self.dytil_dfhat = dytil_dfhat
#dytil_dfhat = np.eye(dytil_dfhat.shape[0])
self.dL_dfhat = np.dot(dL_dytil, dytil_dfhat) #FIXME: Purely for checkgradding....
return dL_dytil, dytil_dfhat return dL_dytil, dytil_dfhat
def _Kgradients(self, dL_d_K_Sigma, dK_dthetaK): def _Kgradients(self, dL_d_K_Sigma, dK_dthetaK):

View file

@ -1,26 +1,29 @@
import unittest import unittest
import numpy as np import numpy as np
np.random.seed(82)
import GPy import GPy
from GPy.models import GP from GPy.models import GP
from GPy.util.linalg import pdinv, tdot from GPy.util.linalg import pdinv, tdot
from scipy import linalg from scipy import linalg
class LikelihoodGradParam(GP): class LikelihoodParamGrad(GP):
def __init__(self, X, likelihood_function, kernel, param_name=None, function=None, **kwargs): def __init__(self, X=None, likelihood_function=None, kernel=None, param_name=None, function=None, dparam_name=None, **kwargs):
super(LikelihoodGradParam, self).__init__(X, likelihood_function, kernel)
self.param_name = param_name self.param_name = param_name
self.dparam_name = dparam_name
self.func = function self.func = function
super(LikelihoodParamGrad, self).__init__(X, likelihood_function, kernel)
#self.func_params = kwargs #self.func_params = kwargs
#self.parameter = self.likelihood.__getattribute__(self.param_name) #self.parameter = self.likelihood.__getattribute__(self.param_name)
def _get_param_names(self): def _get_param_names(self):
f_hats = ["f_{}".format(i) for i in range(len(self.likelihood.f_hat))] params = getattr(self.likelihood, self.dparam_name)
return f_hats params_names = ["{}_{}".format(self.dparam_name, i) for i in range(len(params))]
return params_names
def _get_params(self): def _get_params(self):
return np.hstack([np.squeeze(self.likelihood.f_hat)]) params = getattr(self.likelihood, self.dparam_name)
#return np.hstack([self.likelihood.__getattribute__(self.param_name)]) return np.hstack([params])
def hack_dL_dK(self): def hack_dL_dK(self):
self.K = self.kern.K(self.X) self.K = self.kern.K(self.X)
@ -38,29 +41,56 @@ class LikelihoodGradParam(GP):
self.dL_dK = 0.5 * (tmp - self.D * self.Ki) self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _set_params(self, x): def _set_params(self, x):
self.likelihood.f_hat = x.reshape(self.N, 1) raise NotImplementedError
def log_likelihood(self):
raise NotImplementedError
def _log_likelihood_gradients(self):
raise NotImplementedError
class Likelihood_F_Grad(LikelihoodParamGrad):
def __init__(self, **kwargs):
super(Likelihood_F_Grad, self).__init__(**kwargs)
def _set_params(self, x):
params = getattr(self.likelihood, self.dparam_name)
setattr(self.likelihood, self.dparam_name, x.reshape(*params.shape))
self.likelihood._compute_likelihood_variables() self.likelihood._compute_likelihood_variables()
self.hack_dL_dK() self.hack_dL_dK()
def log_likelihood(self): def log_likelihood(self):
return self.func(self.likelihood)[0, 0] ll = self.func(self)
if self.param_name == "dL_dfhat_":
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
if len(ll.shape) == 0 or len(ll.shape) == 1:
return ll.sum()
elif len(ll.shape) == 2:
#print "Only checking first likelihood"
return ll[0, 0]
else:
raise ValueError('Not implemented for larger matricies yet')
return ll
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
#gradient = self.likelihood.__getattribute__(self.param_name)
self.likelihood._compute_likelihood_variables() self.likelihood._compute_likelihood_variables()
self.likelihood._gradients(partial=np.diag(self.dL_dK)) self.likelihood._gradients(partial=np.diag(self.dL_dK))
gradient = getattr(self.likelihood, self.param_name) gradient = getattr(self.likelihood, self.param_name)
#Need to sum over fhats? For dytil_dfhat... if len(gradient.shape) == 1:
#gradient = np.flatten(gradient, axis=0) return gradient
#return gradient[:, 0] elif len(gradient.shape) == 2:
#print "Only checking first gradients"
return gradient[0,: ] return gradient[0,: ]
else:
raise ValueError('Not implemented for larger matricies yet')
class LaplaceTests(unittest.TestCase): class LaplaceTests(unittest.TestCase):
def setUp(self): def setUp(self):
real_var = 0.1 real_var = 0.1
#Start a function, any function #Start a function, any function
self.X = np.linspace(0.0, 10.0, 30)[:, None] self.X = np.linspace(0.0, 10.0, 4)[:, None]
#self.X = np.random.randn(,1) #self.X = np.random.randn(,1)
#self.X = np.ones((10,1)) #self.X = np.ones((10,1))
Y = np.sin(self.X) + np.random.randn(*self.X.shape)*real_var Y = np.sin(self.X) + np.random.randn(*self.X.shape)*real_var
@ -74,49 +104,40 @@ class LaplaceTests(unittest.TestCase):
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=initial_sd_guess) t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=initial_sd_guess)
self.stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, rasm=True) self.stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, rasm=True)
self.stu_t_likelihood.fit_full(self.kernel.K(self.X)) self.stu_t_likelihood.fit_full(self.kernel.K(self.X))
self.m = LikelihoodGradParam(self.X, self.stu_t_likelihood, self.kernel, None, None)
self.m.constrain_fixed('rbf_v', 1.0898)
self.m.constrain_fixed('rbf_l', 1.8651)
def tearDown(self): def tearDown(self):
self.m = None self.m = None
def test_dy_dfhat(self): def test_dy_dfhat(self):
def ytil(likelihood): def ytil(self):
Sigma_tilde = likelihood.Sigma_tilde Sigma_tilde = self.likelihood.Sigma_tilde
K = likelihood.K K = self.likelihood.K
Ki, _, _, _ = pdinv(K) Ki, _, _, _ = pdinv(K)
f_hat = likelihood.f_hat f_hat = self.likelihood.f_hat
Sigma, _, _, _ = pdinv(Sigma_tilde) Sigma, _, _, _ = pdinv(Sigma_tilde)
return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat) return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat)
self.m.func = ytil self.m = Likelihood_F_Grad(X=self.X, likelihood_function=self.stu_t_likelihood,
self.m.param_name = 'dytil_dfhat' kernel=self.kernel, param_name='dytil_dfhat',
function=ytil, dparam_name='f_hat')
#self.m.constrain_fixed('rbf_v', 1.0898)
#self.m.constrain_fixed('rbf_l', 1.8651)
self.m.randomize() self.m.randomize()
#try:
self.m.checkgrad(verbose=1) self.m.checkgrad(verbose=1)
assert self.m.checkgrad() assert self.m.checkgrad()
#except:
#import ipdb;ipdb.set_trace()
def test_dL_dfhat(self):
def L(self):
return np.array(-0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
#def test_dL_dytil(self): self.m = Likelihood_F_Grad(X=self.X, likelihood_function=self.stu_t_likelihood,
#def L(likelihood): kernel=self.kernel, param_name='dL_dfhat',
##-0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z function=L, dparam_name='f_hat')
#Sigma_tilde = likelihood.Sigma_tilde self.m.constrain_fixed('rbf_v', 1.0898)
#Ki = likelihood.K self.m.constrain_fixed('rbf_l', 1.8651)
#f_hat = likelihood.f_hat self.m.randomize()
#Sigma, _, _, _ = pdinv(Sigma_tilde) self.m.checkgrad(verbose=1)
#return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat) assert self.m.checkgrad()
#self.m.func = L
#self.m.param_name = 'dL_dytil'
#m.randomize()
##try:
#m.checkgrad(verbose=1)
#assert m.checkgrad()
#except:
#import ipdb;ipdb.set_trace()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()