From 146d7e2458cbfc69f8303b0b413e50cebf7fd7f7 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 17 May 2013 17:42:00 +0100 Subject: [PATCH] Trying to fix dL_dytil gradient --- GPy/likelihoods/Laplace.py | 23 +++++- GPy/testing/laplace_approx_tests.py | 109 +++++++++++++++++----------- 2 files changed, 84 insertions(+), 48 deletions(-) diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index b0dde03f..af20d36a 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -79,16 +79,29 @@ class Laplace(likelihood): return (self._Kgradients(dL_d_K_Sigma, dK_dthetaK), self._gradients(dL_d_K_Sigma)) def _shared_gradients_components(self): - dL_dytil = -np.dot(self.Y.T, inv(self.K+self.Sigma_tilde)) #or *0.5? Shouldn't this be -y*R + Ki, _, _, _ = pdinv(self.K) + + #Y__KS_i = np.dot(self.Y.T, inv(self.K+self.Sigma_tilde)) + #dL_dytil = -0.5*Y__KS_i #or *0.5? Shouldn't this be -y*R + #dL_dytil = -0.5*np.trace(np.dot(inv(self.K+self.Sigma_tilde), (np.dot(self.Y, self.Y.T) + self.Y.T))) + #dL_dytil_simple_term = -0.5*np.dot(inv(self.K+self.Sigma_tilde), + #dL_dytil_simple_term = -np.dot(self.Y.T, inv(self.K+self.Sigma_tilde), self.Y) + c = inv(self.K+self.Sigma_tilde) + dL_dytil_simple_term = -0.5*np.diag(np.dot(c, self.Y) + np.dot(self.Y.T, c)) + + P = np.diagflat(1/np.dot(Ki, self.f_hat)) + K_Wi_i = inv(self.K+self.Sigma_tilde) + + dL_dytil_difficult_term = np.diag(( -0.5*(np.dot(self.K + self.Sigma_tilde, P)) + +0.5*mdot(K_Wi_i, self.Y, self.Y.T, K_Wi_i, P) + ) * np.eye(self.N)) + dL_dytil = dL_dytil_simple_term + dL_dytil_difficult_term d3likelihood_d3fhat = self.likelihood_function.d3link(self.data, self.f_hat, self.extra_data) Wi = np.diagonal(self.Sigma_tilde) #Convenience #Can just hadamard product as diagonal matricies multiplied are just multiplying elements dWi_dfhat = np.diagflat(-1*Wi*(-1*d3likelihood_d3fhat)*Wi) - Ki, _, _, _ = pdinv(self.K) - #dytil_dfhat_implicit = np.dot(dWi_dfhat, Ki) + np.eye(self.N) - #dytil_dfhat = np.dot(dWi_dfhat, Ki) + np.eye(self.N) #Wi(Ki + W) = Wi__Ki_W using the last K prior given to fit_full #dytil_dfhat_explicit = self.Wi__Ki_W @@ -97,6 +110,8 @@ class Laplace(likelihood): dytil_dfhat = - np.diagflat(np.dot(dWi_dfhat, np.dot(Ki, self.f_hat))) + np.dot(self.Sigma_tilde, Ki) + np.eye(self.N) self.dytil_dfhat = dytil_dfhat + #dytil_dfhat = np.eye(dytil_dfhat.shape[0]) + self.dL_dfhat = np.dot(dL_dytil, dytil_dfhat) #FIXME: Purely for checkgradding.... return dL_dytil, dytil_dfhat def _Kgradients(self, dL_d_K_Sigma, dK_dthetaK): diff --git a/GPy/testing/laplace_approx_tests.py b/GPy/testing/laplace_approx_tests.py index 2b3af2ad..acb1c822 100644 --- a/GPy/testing/laplace_approx_tests.py +++ b/GPy/testing/laplace_approx_tests.py @@ -1,26 +1,29 @@ import unittest import numpy as np +np.random.seed(82) import GPy from GPy.models import GP from GPy.util.linalg import pdinv, tdot from scipy import linalg -class LikelihoodGradParam(GP): - def __init__(self, X, likelihood_function, kernel, param_name=None, function=None, **kwargs): - super(LikelihoodGradParam, self).__init__(X, likelihood_function, kernel) +class LikelihoodParamGrad(GP): + def __init__(self, X=None, likelihood_function=None, kernel=None, param_name=None, function=None, dparam_name=None, **kwargs): self.param_name = param_name + self.dparam_name = dparam_name self.func = function + super(LikelihoodParamGrad, self).__init__(X, likelihood_function, kernel) #self.func_params = kwargs #self.parameter = self.likelihood.__getattribute__(self.param_name) def _get_param_names(self): - f_hats = ["f_{}".format(i) for i in range(len(self.likelihood.f_hat))] - return f_hats + params = getattr(self.likelihood, self.dparam_name) + params_names = ["{}_{}".format(self.dparam_name, i) for i in range(len(params))] + return params_names def _get_params(self): - return np.hstack([np.squeeze(self.likelihood.f_hat)]) - #return np.hstack([self.likelihood.__getattribute__(self.param_name)]) + params = getattr(self.likelihood, self.dparam_name) + return np.hstack([params]) def hack_dL_dK(self): self.K = self.kern.K(self.X) @@ -38,29 +41,56 @@ class LikelihoodGradParam(GP): self.dL_dK = 0.5 * (tmp - self.D * self.Ki) def _set_params(self, x): - self.likelihood.f_hat = x.reshape(self.N, 1) + raise NotImplementedError + + def log_likelihood(self): + raise NotImplementedError + + def _log_likelihood_gradients(self): + raise NotImplementedError + + +class Likelihood_F_Grad(LikelihoodParamGrad): + def __init__(self, **kwargs): + super(Likelihood_F_Grad, self).__init__(**kwargs) + + def _set_params(self, x): + params = getattr(self.likelihood, self.dparam_name) + setattr(self.likelihood, self.dparam_name, x.reshape(*params.shape)) self.likelihood._compute_likelihood_variables() self.hack_dL_dK() def log_likelihood(self): - return self.func(self.likelihood)[0, 0] + ll = self.func(self) + if self.param_name == "dL_dfhat_": + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + if len(ll.shape) == 0 or len(ll.shape) == 1: + return ll.sum() + elif len(ll.shape) == 2: + #print "Only checking first likelihood" + return ll[0, 0] + else: + raise ValueError('Not implemented for larger matricies yet') + return ll def _log_likelihood_gradients(self): - #gradient = self.likelihood.__getattribute__(self.param_name) self.likelihood._compute_likelihood_variables() self.likelihood._gradients(partial=np.diag(self.dL_dK)) gradient = getattr(self.likelihood, self.param_name) - #Need to sum over fhats? For dytil_dfhat... - #gradient = np.flatten(gradient, axis=0) - #return gradient[:, 0] - return gradient[0, :] + if len(gradient.shape) == 1: + return gradient + elif len(gradient.shape) == 2: + #print "Only checking first gradients" + return gradient[0,: ] + else: + raise ValueError('Not implemented for larger matricies yet') class LaplaceTests(unittest.TestCase): def setUp(self): real_var = 0.1 #Start a function, any function - self.X = np.linspace(0.0, 10.0, 30)[:, None] + self.X = np.linspace(0.0, 10.0, 4)[:, None] #self.X = np.random.randn(,1) #self.X = np.ones((10,1)) Y = np.sin(self.X) + np.random.randn(*self.X.shape)*real_var @@ -74,49 +104,40 @@ class LaplaceTests(unittest.TestCase): t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=initial_sd_guess) self.stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, rasm=True) self.stu_t_likelihood.fit_full(self.kernel.K(self.X)) - self.m = LikelihoodGradParam(self.X, self.stu_t_likelihood, self.kernel, None, None) - self.m.constrain_fixed('rbf_v', 1.0898) - self.m.constrain_fixed('rbf_l', 1.8651) def tearDown(self): self.m = None def test_dy_dfhat(self): - def ytil(likelihood): - Sigma_tilde = likelihood.Sigma_tilde - K = likelihood.K + def ytil(self): + Sigma_tilde = self.likelihood.Sigma_tilde + K = self.likelihood.K Ki, _, _, _ = pdinv(K) - f_hat = likelihood.f_hat + f_hat = self.likelihood.f_hat Sigma, _, _, _ = pdinv(Sigma_tilde) return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat) - self.m.func = ytil - self.m.param_name = 'dytil_dfhat' + self.m = Likelihood_F_Grad(X=self.X, likelihood_function=self.stu_t_likelihood, + kernel=self.kernel, param_name='dytil_dfhat', + function=ytil, dparam_name='f_hat') + #self.m.constrain_fixed('rbf_v', 1.0898) + #self.m.constrain_fixed('rbf_l', 1.8651) self.m.randomize() - #try: self.m.checkgrad(verbose=1) assert self.m.checkgrad() - #except: - #import ipdb;ipdb.set_trace() + def test_dL_dfhat(self): + def L(self): + return np.array(-0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z) - #def test_dL_dytil(self): - #def L(likelihood): - ##-0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z - #Sigma_tilde = likelihood.Sigma_tilde - #Ki = likelihood.K - #f_hat = likelihood.f_hat - #Sigma, _, _, _ = pdinv(Sigma_tilde) - #return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat) - - #self.m.func = L - #self.m.param_name = 'dL_dytil' - #m.randomize() - ##try: - #m.checkgrad(verbose=1) - #assert m.checkgrad() - #except: - #import ipdb;ipdb.set_trace() + self.m = Likelihood_F_Grad(X=self.X, likelihood_function=self.stu_t_likelihood, + kernel=self.kernel, param_name='dL_dfhat', + function=L, dparam_name='f_hat') + self.m.constrain_fixed('rbf_v', 1.0898) + self.m.constrain_fixed('rbf_l', 1.8651) + self.m.randomize() + self.m.checkgrad(verbose=1) + assert self.m.checkgrad() if __name__ == "__main__": unittest.main()