From 5c9587404d27af60e5be0df5f630d1d4e02fd064 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 10 Apr 2015 14:58:02 +0100 Subject: [PATCH] Added some numerical stability to link functions with tests for link functions --- GPy/likelihoods/link_functions.py | 59 +++++++----- GPy/testing/link_function_tests.py | 143 +++++++++++++++++++++++++++++ GPy/util/misc.py | 22 ++++- 3 files changed, 197 insertions(+), 27 deletions(-) create mode 100644 GPy/testing/link_function_tests.py diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 60e260e7..03495c7e 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -5,9 +5,8 @@ import numpy as np from scipy import stats import scipy as sp from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf - -_exp_lim_val = np.finfo(np.float64).max -_lim_val = np.log(_exp_lim_val) +from scipy.special import cbrt +from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times class GPTransformation(object): """ @@ -70,7 +69,7 @@ class Probit(GPTransformation): .. math:: g(f) = \\Phi^{-1} (mu) - + """ def transf(self,f): return std_norm_cdf(f) @@ -84,7 +83,7 @@ class Probit(GPTransformation): def d3transf_df3(self,f): #FIXME - f2 = f**2 + f2 = safe_square(f) return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2) @@ -98,22 +97,26 @@ class Cloglog(GPTransformation): or f = \log (-\log(1-p)) - + """ def transf(self,f): - return 1-np.exp(-np.exp(f)) + ef = safe_exp(f) + return 1-np.exp(-ef) def dtransf_df(self,f): - return np.exp(f-np.exp(f)) + ef = safe_exp(f) + return np.exp(f-ef) def d2transf_df2(self,f): - ef = np.exp(f) + ef = safe_exp(f) return -np.exp(f-ef)*(ef-1.) def d3transf_df3(self,f): - ef = np.exp(f) - return np.exp(f-ef)*(1.-3*ef + ef**2) - + ef = safe_exp(f) + ef2 = safe_square(ef) + three_times_ef = safe_three_times(ef) + r_val = np.exp(f-ef)*(1.-three_times_ef + ef2) + return r_val class Log(GPTransformation): """ @@ -123,16 +126,16 @@ class Log(GPTransformation): """ def transf(self,f): - return np.exp(np.clip(f, -_lim_val, _lim_val)) + return safe_exp(f) def dtransf_df(self,f): - return np.exp(np.clip(f, -_lim_val, _lim_val)) + return safe_exp(f) def d2transf_df2(self,f): - return np.exp(np.clip(f, -_lim_val, _lim_val)) + return safe_exp(f) def d3transf_df3(self,f): - return np.exp(np.clip(f, -_lim_val, _lim_val)) + return safe_exp(f) class Log_ex_1(GPTransformation): """ @@ -142,17 +145,20 @@ class Log_ex_1(GPTransformation): """ def transf(self,f): - return np.log(1.+np.exp(f)) + return np.log1p(safe_exp(f)) def dtransf_df(self,f): - return np.exp(f)/(1.+np.exp(f)) + ef = safe_exp(f) + return ef/(1.+ef) def d2transf_df2(self,f): - aux = np.exp(f)/(1.+np.exp(f)) + ef = safe_exp(f) + aux = ef/(1.+ef) return aux*(1.-aux) def d3transf_df3(self,f): - aux = np.exp(f)/(1.+np.exp(f)) + ef = safe_exp(f) + aux = ef/(1.+ef) daux_df = aux*(1.-aux) return daux_df - (2.*aux*daux_df) @@ -160,14 +166,17 @@ class Reciprocal(GPTransformation): def transf(self,f): return 1./f - def dtransf_df(self,f): - return -1./(f**2) + def dtransf_df(self, f): + f2 = safe_square(f) + return -1./f2 - def d2transf_df2(self,f): - return 2./(f**3) + def d2transf_df2(self, f): + f3 = safe_cube(f) + return 2./f3 def d3transf_df3(self,f): - return -6./(f**4) + f4 = safe_quad(f) + return -6./f4 class Heaviside(GPTransformation): """ diff --git a/GPy/testing/link_function_tests.py b/GPy/testing/link_function_tests.py new file mode 100644 index 00000000..fb8fba99 --- /dev/null +++ b/GPy/testing/link_function_tests.py @@ -0,0 +1,143 @@ +import numpy as np +import scipy as sp +from scipy.special import cbrt +from GPy.models import GradientChecker +_lim_val = np.finfo(np.float64).max +_lim_val_exp = np.log(_lim_val) +_lim_val_square = np.sqrt(_lim_val) +_lim_val_cube = cbrt(_lim_val) +from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside + +class LinkFunctionTests(np.testing.TestCase): + def setUp(self): + self.small_f = np.array([[-1e-4]]) + self.zero_f = np.array([[1e-4]]) + self.mid_f = np.array([[5.0]]) + self.large_f = np.array([[1e4]]) + self.f_lower_lim = np.array(-np.inf) + self.f_upper_lim = np.array(np.inf) + + def check_gradient(self, link_func, lim_of_inf, test_lim=False): + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.mid_f) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.mid_f) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.mid_f) + self.assertTrue(grad3.checkgrad(verbose=True)) + + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.small_f) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.small_f) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.small_f) + self.assertTrue(grad3.checkgrad(verbose=True)) + + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.zero_f) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.zero_f) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.zero_f) + self.assertTrue(grad3.checkgrad(verbose=True)) + + #Do a limit test if the large f value is too large + large_f = np.clip(self.large_f, -np.inf, lim_of_inf-1e-3) + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=large_f) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=large_f) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=large_f) + self.assertTrue(grad3.checkgrad(verbose=True)) + + if test_lim: + print "Testing limits" + #Remove some otherwise we are too close to the limit for gradcheck to work effectively + lim_of_inf = lim_of_inf - 1e-4 + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=lim_of_inf) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=lim_of_inf) + self.assertTrue(grad3.checkgrad(verbose=True)) + + def check_overflow(self, link_func, lim_of_inf): + #Check that it does something sensible beyond this limit, + #note this is not checking the value is correct, just that it isn't nan + beyond_lim_of_inf = lim_of_inf + 100.0 + self.assertFalse(np.isinf(link_func.transf(beyond_lim_of_inf))) + self.assertFalse(np.isinf(link_func.dtransf_df(beyond_lim_of_inf))) + self.assertFalse(np.isinf(link_func.d2transf_df2(beyond_lim_of_inf))) + + self.assertFalse(np.isnan(link_func.transf(beyond_lim_of_inf))) + self.assertFalse(np.isnan(link_func.dtransf_df(beyond_lim_of_inf))) + self.assertFalse(np.isnan(link_func.d2transf_df2(beyond_lim_of_inf))) + + def test_log_overflow(self): + link = Log() + lim_of_inf = _lim_val_exp + + np.testing.assert_almost_equal(np.exp(self.mid_f), link.transf(self.mid_f)) + assert np.isinf(np.exp(np.log(self.f_upper_lim))) + #Check the clipping works + np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5) + #Need to look at most significant figures here rather than the decimals + np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5) + self.check_overflow(link, lim_of_inf) + + #Check that it would otherwise fail + beyond_lim_of_inf = lim_of_inf + 10.0 + old_err_state = np.seterr(over='ignore') + self.assertTrue(np.isinf(np.exp(beyond_lim_of_inf))) + np.seterr(**old_err_state) + + def test_log_ex_1_overflow(self): + link = Log_ex_1() + lim_of_inf = _lim_val_exp + + np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f)) + assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim)))) + #Check the clipping works + np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5) + #Need to look at most significant figures here rather than the decimals + np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5) + self.check_overflow(link, lim_of_inf) + + #Check that it would otherwise fail + beyond_lim_of_inf = lim_of_inf + 10.0 + old_err_state = np.seterr(over='ignore') + self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf)))) + np.seterr(**old_err_state) + + + def test_log_gradients(self): + # transf dtransf_df d2transf_df2 d3transf_df3 + link = Log() + lim_of_inf = _lim_val_exp + self.check_gradient(link, lim_of_inf, test_lim=True) + + def test_identity_gradients(self): + link = Identity() + lim_of_inf = _lim_val + #FIXME: Should be able to think of a way to test the limits of this + self.check_gradient(link, lim_of_inf, test_lim=False) + + def test_probit_gradients(self): + link = Probit() + lim_of_inf = _lim_val + self.check_gradient(link, lim_of_inf, test_lim=True) + + def test_Cloglog_gradients(self): + link = Cloglog() + lim_of_inf = _lim_val_exp + self.check_gradient(link, lim_of_inf, test_lim=True) + + def test_Log_ex_1_gradients(self): + link = Log_ex_1() + lim_of_inf = _lim_val_exp + self.check_gradient(link, lim_of_inf, test_lim=True) + self.check_overflow(link, lim_of_inf) + + def test_reciprocal_gradients(self): + link = Reciprocal() + lim_of_inf = _lim_val + #Does not work with much smaller values, and values closer to zero than 1e-5 + self.check_gradient(link, lim_of_inf, test_lim=True) diff --git a/GPy/util/misc.py b/GPy/util/misc.py index 37e19b9f..3b88da48 100644 --- a/GPy/util/misc.py +++ b/GPy/util/misc.py @@ -6,15 +6,33 @@ from scipy.special import cbrt from .config import * _lim_val = np.finfo(np.float64).max - _lim_val_exp = np.log(_lim_val) _lim_val_square = np.sqrt(_lim_val) -_lim_val_cube = cbrt(_lim_val) +#_lim_val_cube = cbrt(_lim_val) +_lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf) +_lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf) +_lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf) def safe_exp(f): clip_f = np.clip(f, -np.inf, _lim_val_exp) return np.exp(clip_f) +def safe_square(f): + f = np.clip(f, -np.inf, _lim_val_square) + return f**2 + +def safe_cube(f): + f = np.clip(f, -np.inf, _lim_val_cube) + return f**3 + +def safe_quad(f): + f = np.clip(f, -np.inf, _lim_val_quad) + return f**4 + +def safe_three_times(f): + f = np.clip(f, -np.inf, _lim_val_three_times) + return 3*f + def chain_1(df_dg, dg_dx): """ Generic chaining function for first derivative