Added some numerical stability to link functions with tests for link functions

This commit is contained in:
Alan Saul 2015-04-10 14:58:02 +01:00
parent 034d141d63
commit 5c9587404d
3 changed files with 197 additions and 27 deletions

View file

@ -5,9 +5,8 @@ import numpy as np
from scipy import stats
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
_exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)
from scipy.special import cbrt
from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
class GPTransformation(object):
"""
@ -70,7 +69,7 @@ class Probit(GPTransformation):
.. math::
g(f) = \\Phi^{-1} (mu)
"""
def transf(self,f):
return std_norm_cdf(f)
@ -84,7 +83,7 @@ class Probit(GPTransformation):
def d3transf_df3(self,f):
#FIXME
f2 = f**2
f2 = safe_square(f)
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
@ -98,22 +97,26 @@ class Cloglog(GPTransformation):
or
f = \log (-\log(1-p))
"""
def transf(self,f):
return 1-np.exp(-np.exp(f))
ef = safe_exp(f)
return 1-np.exp(-ef)
def dtransf_df(self,f):
return np.exp(f-np.exp(f))
ef = safe_exp(f)
return np.exp(f-ef)
def d2transf_df2(self,f):
ef = np.exp(f)
ef = safe_exp(f)
return -np.exp(f-ef)*(ef-1.)
def d3transf_df3(self,f):
ef = np.exp(f)
return np.exp(f-ef)*(1.-3*ef + ef**2)
ef = safe_exp(f)
ef2 = safe_square(ef)
three_times_ef = safe_three_times(ef)
r_val = np.exp(f-ef)*(1.-three_times_ef + ef2)
return r_val
class Log(GPTransformation):
"""
@ -123,16 +126,16 @@ class Log(GPTransformation):
"""
def transf(self,f):
return np.exp(np.clip(f, -_lim_val, _lim_val))
return safe_exp(f)
def dtransf_df(self,f):
return np.exp(np.clip(f, -_lim_val, _lim_val))
return safe_exp(f)
def d2transf_df2(self,f):
return np.exp(np.clip(f, -_lim_val, _lim_val))
return safe_exp(f)
def d3transf_df3(self,f):
return np.exp(np.clip(f, -_lim_val, _lim_val))
return safe_exp(f)
class Log_ex_1(GPTransformation):
"""
@ -142,17 +145,20 @@ class Log_ex_1(GPTransformation):
"""
def transf(self,f):
return np.log(1.+np.exp(f))
return np.log1p(safe_exp(f))
def dtransf_df(self,f):
return np.exp(f)/(1.+np.exp(f))
ef = safe_exp(f)
return ef/(1.+ef)
def d2transf_df2(self,f):
aux = np.exp(f)/(1.+np.exp(f))
ef = safe_exp(f)
aux = ef/(1.+ef)
return aux*(1.-aux)
def d3transf_df3(self,f):
aux = np.exp(f)/(1.+np.exp(f))
ef = safe_exp(f)
aux = ef/(1.+ef)
daux_df = aux*(1.-aux)
return daux_df - (2.*aux*daux_df)
@ -160,14 +166,17 @@ class Reciprocal(GPTransformation):
def transf(self,f):
return 1./f
def dtransf_df(self,f):
return -1./(f**2)
def dtransf_df(self, f):
f2 = safe_square(f)
return -1./f2
def d2transf_df2(self,f):
return 2./(f**3)
def d2transf_df2(self, f):
f3 = safe_cube(f)
return 2./f3
def d3transf_df3(self,f):
return -6./(f**4)
f4 = safe_quad(f)
return -6./f4
class Heaviside(GPTransformation):
"""

View file

@ -0,0 +1,143 @@
import numpy as np
import scipy as sp
from scipy.special import cbrt
from GPy.models import GradientChecker
_lim_val = np.finfo(np.float64).max
_lim_val_exp = np.log(_lim_val)
_lim_val_square = np.sqrt(_lim_val)
_lim_val_cube = cbrt(_lim_val)
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside
class LinkFunctionTests(np.testing.TestCase):
def setUp(self):
self.small_f = np.array([[-1e-4]])
self.zero_f = np.array([[1e-4]])
self.mid_f = np.array([[5.0]])
self.large_f = np.array([[1e4]])
self.f_lower_lim = np.array(-np.inf)
self.f_upper_lim = np.array(np.inf)
def check_gradient(self, link_func, lim_of_inf, test_lim=False):
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.mid_f)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.mid_f)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.mid_f)
self.assertTrue(grad3.checkgrad(verbose=True))
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.small_f)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.small_f)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.small_f)
self.assertTrue(grad3.checkgrad(verbose=True))
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.zero_f)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.zero_f)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.zero_f)
self.assertTrue(grad3.checkgrad(verbose=True))
#Do a limit test if the large f value is too large
large_f = np.clip(self.large_f, -np.inf, lim_of_inf-1e-3)
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=large_f)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=large_f)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=large_f)
self.assertTrue(grad3.checkgrad(verbose=True))
if test_lim:
print "Testing limits"
#Remove some otherwise we are too close to the limit for gradcheck to work effectively
lim_of_inf = lim_of_inf - 1e-4
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=lim_of_inf)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=lim_of_inf)
self.assertTrue(grad3.checkgrad(verbose=True))
def check_overflow(self, link_func, lim_of_inf):
#Check that it does something sensible beyond this limit,
#note this is not checking the value is correct, just that it isn't nan
beyond_lim_of_inf = lim_of_inf + 100.0
self.assertFalse(np.isinf(link_func.transf(beyond_lim_of_inf)))
self.assertFalse(np.isinf(link_func.dtransf_df(beyond_lim_of_inf)))
self.assertFalse(np.isinf(link_func.d2transf_df2(beyond_lim_of_inf)))
self.assertFalse(np.isnan(link_func.transf(beyond_lim_of_inf)))
self.assertFalse(np.isnan(link_func.dtransf_df(beyond_lim_of_inf)))
self.assertFalse(np.isnan(link_func.d2transf_df2(beyond_lim_of_inf)))
def test_log_overflow(self):
link = Log()
lim_of_inf = _lim_val_exp
np.testing.assert_almost_equal(np.exp(self.mid_f), link.transf(self.mid_f))
assert np.isinf(np.exp(np.log(self.f_upper_lim)))
#Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail
beyond_lim_of_inf = lim_of_inf + 10.0
old_err_state = np.seterr(over='ignore')
self.assertTrue(np.isinf(np.exp(beyond_lim_of_inf)))
np.seterr(**old_err_state)
def test_log_ex_1_overflow(self):
link = Log_ex_1()
lim_of_inf = _lim_val_exp
np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim))))
#Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5)
self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail
beyond_lim_of_inf = lim_of_inf + 10.0
old_err_state = np.seterr(over='ignore')
self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf))))
np.seterr(**old_err_state)
def test_log_gradients(self):
# transf dtransf_df d2transf_df2 d3transf_df3
link = Log()
lim_of_inf = _lim_val_exp
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_identity_gradients(self):
link = Identity()
lim_of_inf = _lim_val
#FIXME: Should be able to think of a way to test the limits of this
self.check_gradient(link, lim_of_inf, test_lim=False)
def test_probit_gradients(self):
link = Probit()
lim_of_inf = _lim_val
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_Cloglog_gradients(self):
link = Cloglog()
lim_of_inf = _lim_val_exp
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_Log_ex_1_gradients(self):
link = Log_ex_1()
lim_of_inf = _lim_val_exp
self.check_gradient(link, lim_of_inf, test_lim=True)
self.check_overflow(link, lim_of_inf)
def test_reciprocal_gradients(self):
link = Reciprocal()
lim_of_inf = _lim_val
#Does not work with much smaller values, and values closer to zero than 1e-5
self.check_gradient(link, lim_of_inf, test_lim=True)

View file

@ -6,15 +6,33 @@ from scipy.special import cbrt
from .config import *
_lim_val = np.finfo(np.float64).max
_lim_val_exp = np.log(_lim_val)
_lim_val_square = np.sqrt(_lim_val)
_lim_val_cube = cbrt(_lim_val)
#_lim_val_cube = cbrt(_lim_val)
_lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf)
_lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf)
_lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf)
def safe_exp(f):
clip_f = np.clip(f, -np.inf, _lim_val_exp)
return np.exp(clip_f)
def safe_square(f):
f = np.clip(f, -np.inf, _lim_val_square)
return f**2
def safe_cube(f):
f = np.clip(f, -np.inf, _lim_val_cube)
return f**3
def safe_quad(f):
f = np.clip(f, -np.inf, _lim_val_quad)
return f**4
def safe_three_times(f):
f = np.clip(f, -np.inf, _lim_val_three_times)
return 3*f
def chain_1(df_dg, dg_dx):
"""
Generic chaining function for first derivative