mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-05 17:52:39 +02:00
Added some numerical stability to link functions with tests for link functions
This commit is contained in:
parent
034d141d63
commit
5c9587404d
3 changed files with 197 additions and 27 deletions
|
|
@ -5,9 +5,8 @@ import numpy as np
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
import scipy as sp
|
import scipy as sp
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
|
||||||
|
from scipy.special import cbrt
|
||||||
_exp_lim_val = np.finfo(np.float64).max
|
from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
|
||||||
_lim_val = np.log(_exp_lim_val)
|
|
||||||
|
|
||||||
class GPTransformation(object):
|
class GPTransformation(object):
|
||||||
"""
|
"""
|
||||||
|
|
@ -84,7 +83,7 @@ class Probit(GPTransformation):
|
||||||
|
|
||||||
def d3transf_df3(self,f):
|
def d3transf_df3(self,f):
|
||||||
#FIXME
|
#FIXME
|
||||||
f2 = f**2
|
f2 = safe_square(f)
|
||||||
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
|
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -101,19 +100,23 @@ class Cloglog(GPTransformation):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def transf(self,f):
|
def transf(self,f):
|
||||||
return 1-np.exp(-np.exp(f))
|
ef = safe_exp(f)
|
||||||
|
return 1-np.exp(-ef)
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
def dtransf_df(self,f):
|
||||||
return np.exp(f-np.exp(f))
|
ef = safe_exp(f)
|
||||||
|
return np.exp(f-ef)
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self,f):
|
||||||
ef = np.exp(f)
|
ef = safe_exp(f)
|
||||||
return -np.exp(f-ef)*(ef-1.)
|
return -np.exp(f-ef)*(ef-1.)
|
||||||
|
|
||||||
def d3transf_df3(self,f):
|
def d3transf_df3(self,f):
|
||||||
ef = np.exp(f)
|
ef = safe_exp(f)
|
||||||
return np.exp(f-ef)*(1.-3*ef + ef**2)
|
ef2 = safe_square(ef)
|
||||||
|
three_times_ef = safe_three_times(ef)
|
||||||
|
r_val = np.exp(f-ef)*(1.-three_times_ef + ef2)
|
||||||
|
return r_val
|
||||||
|
|
||||||
class Log(GPTransformation):
|
class Log(GPTransformation):
|
||||||
"""
|
"""
|
||||||
|
|
@ -123,16 +126,16 @@ class Log(GPTransformation):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def transf(self,f):
|
def transf(self,f):
|
||||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
return safe_exp(f)
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
def dtransf_df(self,f):
|
||||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
return safe_exp(f)
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self,f):
|
||||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
return safe_exp(f)
|
||||||
|
|
||||||
def d3transf_df3(self,f):
|
def d3transf_df3(self,f):
|
||||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
return safe_exp(f)
|
||||||
|
|
||||||
class Log_ex_1(GPTransformation):
|
class Log_ex_1(GPTransformation):
|
||||||
"""
|
"""
|
||||||
|
|
@ -142,17 +145,20 @@ class Log_ex_1(GPTransformation):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def transf(self,f):
|
def transf(self,f):
|
||||||
return np.log(1.+np.exp(f))
|
return np.log1p(safe_exp(f))
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
def dtransf_df(self,f):
|
||||||
return np.exp(f)/(1.+np.exp(f))
|
ef = safe_exp(f)
|
||||||
|
return ef/(1.+ef)
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self,f):
|
||||||
aux = np.exp(f)/(1.+np.exp(f))
|
ef = safe_exp(f)
|
||||||
|
aux = ef/(1.+ef)
|
||||||
return aux*(1.-aux)
|
return aux*(1.-aux)
|
||||||
|
|
||||||
def d3transf_df3(self,f):
|
def d3transf_df3(self,f):
|
||||||
aux = np.exp(f)/(1.+np.exp(f))
|
ef = safe_exp(f)
|
||||||
|
aux = ef/(1.+ef)
|
||||||
daux_df = aux*(1.-aux)
|
daux_df = aux*(1.-aux)
|
||||||
return daux_df - (2.*aux*daux_df)
|
return daux_df - (2.*aux*daux_df)
|
||||||
|
|
||||||
|
|
@ -160,14 +166,17 @@ class Reciprocal(GPTransformation):
|
||||||
def transf(self,f):
|
def transf(self,f):
|
||||||
return 1./f
|
return 1./f
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
def dtransf_df(self, f):
|
||||||
return -1./(f**2)
|
f2 = safe_square(f)
|
||||||
|
return -1./f2
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
def d2transf_df2(self, f):
|
||||||
return 2./(f**3)
|
f3 = safe_cube(f)
|
||||||
|
return 2./f3
|
||||||
|
|
||||||
def d3transf_df3(self,f):
|
def d3transf_df3(self,f):
|
||||||
return -6./(f**4)
|
f4 = safe_quad(f)
|
||||||
|
return -6./f4
|
||||||
|
|
||||||
class Heaviside(GPTransformation):
|
class Heaviside(GPTransformation):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
143
GPy/testing/link_function_tests.py
Normal file
143
GPy/testing/link_function_tests.py
Normal file
|
|
@ -0,0 +1,143 @@
|
||||||
|
import numpy as np
|
||||||
|
import scipy as sp
|
||||||
|
from scipy.special import cbrt
|
||||||
|
from GPy.models import GradientChecker
|
||||||
|
_lim_val = np.finfo(np.float64).max
|
||||||
|
_lim_val_exp = np.log(_lim_val)
|
||||||
|
_lim_val_square = np.sqrt(_lim_val)
|
||||||
|
_lim_val_cube = cbrt(_lim_val)
|
||||||
|
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside
|
||||||
|
|
||||||
|
class LinkFunctionTests(np.testing.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.small_f = np.array([[-1e-4]])
|
||||||
|
self.zero_f = np.array([[1e-4]])
|
||||||
|
self.mid_f = np.array([[5.0]])
|
||||||
|
self.large_f = np.array([[1e4]])
|
||||||
|
self.f_lower_lim = np.array(-np.inf)
|
||||||
|
self.f_upper_lim = np.array(np.inf)
|
||||||
|
|
||||||
|
def check_gradient(self, link_func, lim_of_inf, test_lim=False):
|
||||||
|
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.mid_f)
|
||||||
|
self.assertTrue(grad.checkgrad(verbose=True))
|
||||||
|
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.mid_f)
|
||||||
|
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||||
|
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.mid_f)
|
||||||
|
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||||
|
|
||||||
|
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.small_f)
|
||||||
|
self.assertTrue(grad.checkgrad(verbose=True))
|
||||||
|
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.small_f)
|
||||||
|
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||||
|
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.small_f)
|
||||||
|
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||||
|
|
||||||
|
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.zero_f)
|
||||||
|
self.assertTrue(grad.checkgrad(verbose=True))
|
||||||
|
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.zero_f)
|
||||||
|
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||||
|
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.zero_f)
|
||||||
|
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||||
|
|
||||||
|
#Do a limit test if the large f value is too large
|
||||||
|
large_f = np.clip(self.large_f, -np.inf, lim_of_inf-1e-3)
|
||||||
|
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=large_f)
|
||||||
|
self.assertTrue(grad.checkgrad(verbose=True))
|
||||||
|
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=large_f)
|
||||||
|
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||||
|
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=large_f)
|
||||||
|
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||||
|
|
||||||
|
if test_lim:
|
||||||
|
print "Testing limits"
|
||||||
|
#Remove some otherwise we are too close to the limit for gradcheck to work effectively
|
||||||
|
lim_of_inf = lim_of_inf - 1e-4
|
||||||
|
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
|
||||||
|
self.assertTrue(grad.checkgrad(verbose=True))
|
||||||
|
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=lim_of_inf)
|
||||||
|
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||||
|
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=lim_of_inf)
|
||||||
|
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||||
|
|
||||||
|
def check_overflow(self, link_func, lim_of_inf):
|
||||||
|
#Check that it does something sensible beyond this limit,
|
||||||
|
#note this is not checking the value is correct, just that it isn't nan
|
||||||
|
beyond_lim_of_inf = lim_of_inf + 100.0
|
||||||
|
self.assertFalse(np.isinf(link_func.transf(beyond_lim_of_inf)))
|
||||||
|
self.assertFalse(np.isinf(link_func.dtransf_df(beyond_lim_of_inf)))
|
||||||
|
self.assertFalse(np.isinf(link_func.d2transf_df2(beyond_lim_of_inf)))
|
||||||
|
|
||||||
|
self.assertFalse(np.isnan(link_func.transf(beyond_lim_of_inf)))
|
||||||
|
self.assertFalse(np.isnan(link_func.dtransf_df(beyond_lim_of_inf)))
|
||||||
|
self.assertFalse(np.isnan(link_func.d2transf_df2(beyond_lim_of_inf)))
|
||||||
|
|
||||||
|
def test_log_overflow(self):
|
||||||
|
link = Log()
|
||||||
|
lim_of_inf = _lim_val_exp
|
||||||
|
|
||||||
|
np.testing.assert_almost_equal(np.exp(self.mid_f), link.transf(self.mid_f))
|
||||||
|
assert np.isinf(np.exp(np.log(self.f_upper_lim)))
|
||||||
|
#Check the clipping works
|
||||||
|
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
|
||||||
|
#Need to look at most significant figures here rather than the decimals
|
||||||
|
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
|
||||||
|
self.check_overflow(link, lim_of_inf)
|
||||||
|
|
||||||
|
#Check that it would otherwise fail
|
||||||
|
beyond_lim_of_inf = lim_of_inf + 10.0
|
||||||
|
old_err_state = np.seterr(over='ignore')
|
||||||
|
self.assertTrue(np.isinf(np.exp(beyond_lim_of_inf)))
|
||||||
|
np.seterr(**old_err_state)
|
||||||
|
|
||||||
|
def test_log_ex_1_overflow(self):
|
||||||
|
link = Log_ex_1()
|
||||||
|
lim_of_inf = _lim_val_exp
|
||||||
|
|
||||||
|
np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
|
||||||
|
assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim))))
|
||||||
|
#Check the clipping works
|
||||||
|
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
|
||||||
|
#Need to look at most significant figures here rather than the decimals
|
||||||
|
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5)
|
||||||
|
self.check_overflow(link, lim_of_inf)
|
||||||
|
|
||||||
|
#Check that it would otherwise fail
|
||||||
|
beyond_lim_of_inf = lim_of_inf + 10.0
|
||||||
|
old_err_state = np.seterr(over='ignore')
|
||||||
|
self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf))))
|
||||||
|
np.seterr(**old_err_state)
|
||||||
|
|
||||||
|
|
||||||
|
def test_log_gradients(self):
|
||||||
|
# transf dtransf_df d2transf_df2 d3transf_df3
|
||||||
|
link = Log()
|
||||||
|
lim_of_inf = _lim_val_exp
|
||||||
|
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||||
|
|
||||||
|
def test_identity_gradients(self):
|
||||||
|
link = Identity()
|
||||||
|
lim_of_inf = _lim_val
|
||||||
|
#FIXME: Should be able to think of a way to test the limits of this
|
||||||
|
self.check_gradient(link, lim_of_inf, test_lim=False)
|
||||||
|
|
||||||
|
def test_probit_gradients(self):
|
||||||
|
link = Probit()
|
||||||
|
lim_of_inf = _lim_val
|
||||||
|
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||||
|
|
||||||
|
def test_Cloglog_gradients(self):
|
||||||
|
link = Cloglog()
|
||||||
|
lim_of_inf = _lim_val_exp
|
||||||
|
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||||
|
|
||||||
|
def test_Log_ex_1_gradients(self):
|
||||||
|
link = Log_ex_1()
|
||||||
|
lim_of_inf = _lim_val_exp
|
||||||
|
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||||
|
self.check_overflow(link, lim_of_inf)
|
||||||
|
|
||||||
|
def test_reciprocal_gradients(self):
|
||||||
|
link = Reciprocal()
|
||||||
|
lim_of_inf = _lim_val
|
||||||
|
#Does not work with much smaller values, and values closer to zero than 1e-5
|
||||||
|
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||||
|
|
@ -6,15 +6,33 @@ from scipy.special import cbrt
|
||||||
from .config import *
|
from .config import *
|
||||||
|
|
||||||
_lim_val = np.finfo(np.float64).max
|
_lim_val = np.finfo(np.float64).max
|
||||||
|
|
||||||
_lim_val_exp = np.log(_lim_val)
|
_lim_val_exp = np.log(_lim_val)
|
||||||
_lim_val_square = np.sqrt(_lim_val)
|
_lim_val_square = np.sqrt(_lim_val)
|
||||||
_lim_val_cube = cbrt(_lim_val)
|
#_lim_val_cube = cbrt(_lim_val)
|
||||||
|
_lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf)
|
||||||
|
_lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf)
|
||||||
|
_lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf)
|
||||||
|
|
||||||
def safe_exp(f):
|
def safe_exp(f):
|
||||||
clip_f = np.clip(f, -np.inf, _lim_val_exp)
|
clip_f = np.clip(f, -np.inf, _lim_val_exp)
|
||||||
return np.exp(clip_f)
|
return np.exp(clip_f)
|
||||||
|
|
||||||
|
def safe_square(f):
|
||||||
|
f = np.clip(f, -np.inf, _lim_val_square)
|
||||||
|
return f**2
|
||||||
|
|
||||||
|
def safe_cube(f):
|
||||||
|
f = np.clip(f, -np.inf, _lim_val_cube)
|
||||||
|
return f**3
|
||||||
|
|
||||||
|
def safe_quad(f):
|
||||||
|
f = np.clip(f, -np.inf, _lim_val_quad)
|
||||||
|
return f**4
|
||||||
|
|
||||||
|
def safe_three_times(f):
|
||||||
|
f = np.clip(f, -np.inf, _lim_val_three_times)
|
||||||
|
return 3*f
|
||||||
|
|
||||||
def chain_1(df_dg, dg_dx):
|
def chain_1(df_dg, dg_dx):
|
||||||
"""
|
"""
|
||||||
Generic chaining function for first derivative
|
Generic chaining function for first derivative
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue