From 42f8180c4e52d62dc1013bfc4834e0c5faf43ee8 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Wed, 11 Sep 2013 15:27:14 +0100 Subject: [PATCH] Tidied up grad checking --- GPy/examples/laplace_approximations.py | 20 ++++---- GPy/likelihoods/laplace.py | 6 ++- GPy/likelihoods/likelihood_functions.py | 24 +++++----- GPy/testing/laplace_tests.py | 63 ++++++++++++++++--------- 4 files changed, 69 insertions(+), 44 deletions(-) diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index c0bc3aef..50e1858b 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -27,7 +27,7 @@ def timing(): t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm') - m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel1) + m = GPy.models.GPRegression(X, Yc.copy(), kernel1, likelihood=corrupt_stu_t_likelihood) m.ensure_default_constraints() m.update_likelihood_approximation() m.optimize() @@ -56,7 +56,7 @@ def v_fail_test(): print "Clean student t, rasm" t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GPRegression(X, stu_t_likelihood, kernel1) + m = GPy.models.GPRegression(X, Y.copy(), kernel1, likelihood=stu_t_likelihood) m.constrain_positive('') vs = 25 noises = 30 @@ -103,7 +103,7 @@ def student_t_obj_plane(): kernelst = kernelgp.copy() t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=(real_std**2)) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst) + m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood) m.ensure_default_constraints() m.constrain_fixed('t_no', real_std**2) vs = 10 @@ -156,7 +156,7 @@ def student_t_f_check(): #kernelst += GPy.kern.bias(X.shape[1]) t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=0.05) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst) + m = GPy.models.GPRegression(X, Y.copy(), kernelst, likelihood=stu_t_likelihood) #m['rbf_v'] = mgp._get_params()[0] #m['rbf_l'] = mgp._get_params()[1] + 1 m.ensure_default_constraints() @@ -211,7 +211,7 @@ def student_t_fix_optimise_check(): plt.figure(1) plt.suptitle('Student likelihood') - m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst) + m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood) m.constrain_fixed('rbf_var', mgp._get_params()[0]) m.constrain_fixed('rbf_len', mgp._get_params()[1]) m.constrain_positive('t_noise') @@ -352,7 +352,7 @@ def debug_student_t_noise_approx(): t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GPRegression(X, stu_t_likelihood, kernel6) + m = GPy.models.GPRegression(X, Y, kernel6, likelihood=stu_t_likelihood) #m['rbf_len'] = 1.5 #m.constrain_fixed('rbf_v', 1.0898) #m.constrain_fixed('rbf_l', 0.2651) @@ -482,7 +482,7 @@ def student_t_approx(): print "Clean student t, rasm" t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GPRegression(X, Y.copy(), kernel6, stu_t_likelihood) + m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood) m.ensure_default_constraints() m.constrain_positive('t_noise') m.randomize() @@ -498,7 +498,7 @@ def student_t_approx(): print "Corrupt student t, rasm" t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm') - m = GPy.models.GPRegression(X, Yc.copy(), kernel4, corrupt_stu_t_likelihood) + m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood) m.ensure_default_constraints() m.constrain_positive('t_noise') m.randomize() @@ -516,7 +516,7 @@ def student_t_approx(): #print "Clean student t, ncg" #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg') - #m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3) + #m = GPy.models.GPRegression(X, Y, kernel3, likelihood=stu_t_likelihood) #m.ensure_default_constraints() #m.update_likelihood_approximation() #m.optimize() @@ -530,7 +530,7 @@ def student_t_approx(): #print "Corrupt student t, ncg" #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) #corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg') - #m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel5) + #m = GPy.models.GPRegression(X, Y, kernel5, likelihood=corrupt_stu_t_likelihood) #m.ensure_default_constraints() #m.update_likelihood_approximation() #m.optimize() diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py index b5b16521..2f98b2ff 100644 --- a/GPy/likelihoods/laplace.py +++ b/GPy/likelihoods/laplace.py @@ -41,9 +41,12 @@ class Laplace(likelihood): self.N, self.D = self.data.shape self.is_heteroscedastic = True self.Nparams = 0 - self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi)) + self.restart() + + + def restart(self): #Initial values for the GP variables self.Y = np.zeros((self.N, 1)) self.covariance_matrix = np.eye(self.N) @@ -53,6 +56,7 @@ class Laplace(likelihood): self.old_a = None + def predictive_values(self, mu, var, full_cov): if full_cov: raise NotImplementedError("Cannot make correlated predictions with an Laplace likelihood") diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 9d4dc041..330116de 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -280,7 +280,7 @@ class StudentT(LikelihoodFunction): ) return d3lik_d3f - def lik_dstd(self, y, f, extra_data=None): + def dlik_dvar(self, y, f, extra_data=None): """ Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation) @@ -291,10 +291,10 @@ class StudentT(LikelihoodFunction): """ assert y.shape == f.shape e = y - f - dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) - return dlik_dsigma + dlik_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) + return dlik_dvar - def dlik_df_dstd(self, y, f, extra_data=None): + def dlik_df_dvar(self, y, f, extra_data=None): """ Gradient of the dlik_df w.r.t sigma parameter (standard deviation) @@ -302,10 +302,10 @@ class StudentT(LikelihoodFunction): """ assert y.shape == f.shape e = y - f - dlik_grad_dsigma = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) - return dlik_grad_dsigma + dlik_grad_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) + return dlik_grad_dvar - def d2lik_d2f_dstd(self, y, f, extra_data=None): + def d2lik_d2f_dvar(self, y, f, extra_data=None): """ Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation) @@ -313,16 +313,16 @@ class StudentT(LikelihoodFunction): """ assert y.shape == f.shape e = y - f - dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) + dlik_hess_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) / ((self.sigma2*self.v + (e**2))**3) ) - return dlik_hess_dsigma + return dlik_hess_dvar def _gradients(self, y, f, extra_data=None): #must be listed in same order as 'get_param_names' - derivs = ([self.lik_dstd(y, f, extra_data=extra_data)], - [self.dlik_df_dstd(y, f, extra_data=extra_data)], - [self.d2lik_d2f_dstd(y, f, extra_data=extra_data)] + derivs = ([self.dlik_dvar(y, f, extra_data=extra_data)], + [self.dlik_df_dvar(y, f, extra_data=extra_data)], + [self.d2lik_d2f_dvar(y, f, extra_data=extra_data)] ) # lists as we might learn many parameters # ensure we have gradients for every parameter we want to optimize assert len(derivs[0]) == len(self._get_param_names()) diff --git a/GPy/testing/laplace_tests.py b/GPy/testing/laplace_tests.py index 351cfcbb..8aabe50a 100644 --- a/GPy/testing/laplace_tests.py +++ b/GPy/testing/laplace_tests.py @@ -4,6 +4,24 @@ import GPy from GPy.models import GradientChecker import functools +def dparam_partial(inst_func, *args): + """ + If we have a instance method that needs to be called but that doesn't + take the parameter we wish to change to checkgrad, then this function + will change the variable using set params. + + inst_func: should be a instance function of an object that we would like + to change + param: the param that will be given to set_params + args: anything else that needs to be given to the function (for example + the f or Y that are being used in the function whilst we tweak the + param + """ + def param_func(param, inst_func, args): + inst_func.im_self._set_params(param) + return inst_func(*args) + return functools.partial(param_func, inst_func=inst_func, args=args) + class LaplaceTests(unittest.TestCase): def setUp(self): self.N = 5 @@ -24,6 +42,7 @@ class LaplaceTests(unittest.TestCase): grad = GradientChecker(link, dlik_df, self.f.copy(), 'f') grad.randomize() grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) def test_gaussian_d2lik_d2f(self): var = 0.1 @@ -33,6 +52,7 @@ class LaplaceTests(unittest.TestCase): grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f') grad.randomize() grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) def test_gaussian_d3lik_d3f(self): var = 0.1 @@ -42,42 +62,43 @@ class LaplaceTests(unittest.TestCase): grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f') grad.randomize() grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) def test_gaussian_dlik_dvar(self): var = 0.1 gauss = GPy.likelihoods.functions.Gaussian(var, self.D, self.N) - #Since the function we are checking does not directly accept the variable we wish to tweak - #We make function which makes the change (set params) then calls the function - def p_link_var(var, likelihood, f, Y): - likelihood._set_params(var) - return likelihood.link_function(f, Y) - def p_dlik_dvar(var, likelihood, f, Y): - likelihood._set_params(var) - return likelihood.dlik_dvar(f, Y) - - link = functools.partial(p_link_var, likelihood=gauss, f=self.f, Y=self.Y) - dlik_dvar = functools.partial(p_dlik_dvar, likelihood=gauss, f=self.f, Y=self.Y) + link = dparam_partial(gauss.link_function, self.Y, self.f) + dlik_dvar = dparam_partial(gauss.dlik_dvar, self.Y, self.f) grad = GradientChecker(link, dlik_dvar, var, 'v') + grad.constrain_positive('v') grad.randomize() grad.checkgrad(verbose=1) + #self.assertTrue(grad.checkgrad()) def test_gaussian_dlik_df_dvar(self): var = 0.1 gauss = GPy.likelihoods.functions.Gaussian(var, self.D, self.N) - def p_dlik_df(var, likelihood, f, Y): - likelihood._set_params(var) - return likelihood.dlik_df(f, Y) - def p_dlik_df_dstd(var, likelihood, f, Y): - likelihood._set_params(var) - return likelihood.dlik_df_dvar(f, Y) - - dlik_df = functools.partial(p_dlik_df, likelihood=gauss, f=self.f, Y=self.Y) - dlik_df_dstd = functools.partial(p_dlik_df_dstd, likelihood=gauss, f=self.f, Y=self.Y) - grad = GradientChecker(dlik_df, dlik_df_dstd, var, 'v') + dlik_df = dparam_partial(gauss.dlik_df, self.Y, self.f) + dlik_df_dvar = dparam_partial(gauss.dlik_df_dvar, self.Y, self.f) + grad = GradientChecker(dlik_df, dlik_df_dvar, var, 'v') + grad.constrain_positive('v') grad.randomize() grad.checkgrad(verbose=1) + #self.assertTrue(grad.checkgrad()) + + def test_studentt_dlik_dvar(self): + var = 0.1 + stu_t = GPy.likelihoods.functions.StudentT(deg_free=5, sigma2=var) + + link = dparam_partial(stu_t.link_function, self.Y, self.f) + dlik_dvar = dparam_partial(stu_t.dlik_dvar, self.Y, self.f) + grad = GradientChecker(link, dlik_dvar, var, 'v') + grad.constrain_positive('v') + grad.randomize() + grad.checkgrad(verbose=1) + #self.assertTrue(grad.checkgrad()) if __name__ == "__main__": print "Running unit tests"