All gradients now gradcheck

This commit is contained in:
Alan Saul 2013-09-12 15:08:02 +01:00
parent 6e405319b0
commit e36ffcba6e
2 changed files with 82 additions and 77 deletions

View file

@ -291,6 +291,7 @@ class StudentT(LikelihoodFunction):
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f e = y - f
#FIXME: OUT BY SOME FUNCTION OF N
dlik_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) dlik_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return dlik_dvar return dlik_dvar
@ -442,7 +443,7 @@ class Gaussian(LikelihoodFunction):
self.I = np.eye(self.N) self.I = np.eye(self.N)
self.covariance_matrix = self.I * self._variance self.covariance_matrix = self.I * self._variance
self.Ki = self.I*(1.0 / self._variance) self.Ki = self.I*(1.0 / self._variance)
self.ln_K = np.trace(self.covariance_matrix) self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
def link_function(self, y, f, extra_data=None): def link_function(self, y, f, extra_data=None):
"""link_function $\ln p(y|f)$ """link_function $\ln p(y|f)$
@ -458,11 +459,11 @@ class Gaussian(LikelihoodFunction):
e = y - f e = y - f
eeT = np.dot(e, e.T) eeT = np.dot(e, e.T)
objective = (- 0.5*self.D*np.log(2*np.pi) objective = (- 0.5*self.D*np.log(2*np.pi)
- 0.5*self.ln_K - 0.5*self.ln_det_K
#- 0.5*np.sum(np.multiply(self.Ki, eeT)) #- 0.5*np.dot(np.dot(e.T, self.Ki), e)
- 0.5*np.dot(np.dot(e.T, self.Ki), e) - (0.5/self._variance)*np.dot(e.T, e) # As long as K is diagonal
) )
return np.sum(objective) # FIXME: put this back! return np.sum(objective)
def dlik_df(self, y, f, extra_data=None): def dlik_df(self, y, f, extra_data=None):
""" """
@ -514,7 +515,8 @@ class Gaussian(LikelihoodFunction):
assert y.shape == f.shape assert y.shape == f.shape
e = y - f e = y - f
s_4 = 1.0/(self._variance**2) s_4 = 1.0/(self._variance**2)
dlik_dsigma = -0.5*self.N/self._variance + 0.5*s_4*np.trace(np.dot(e.T, np.dot(self.I, e))) dlik_dsigma = -0.5*self.N/self._variance + 0.5*s_4*np.dot(e.T, e)
#dlik_dsigma = -0.5*self.N + 0.5*s_4*np.dot(e.T, e)
return dlik_dsigma return dlik_dsigma
def dlik_df_dvar(self, y, f, extra_data=None): def dlik_df_dvar(self, y, f, extra_data=None):
@ -523,7 +525,7 @@ class Gaussian(LikelihoodFunction):
""" """
assert y.shape == f.shape assert y.shape == f.shape
s_4 = 1.0/(self._variance**2) s_4 = 1.0/(self._variance**2)
dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + 0.5*np.dot(s_4, np.dot(self.I, f)) dlik_grad_dsigma = -np.dot(s_4*self.I, y) + np.dot(s_4*self.I, f)
return dlik_grad_dsigma return dlik_grad_dsigma
def d2lik_d2f_dvar(self, y, f, extra_data=None): def d2lik_d2f_dvar(self, y, f, extra_data=None):
@ -533,7 +535,7 @@ class Gaussian(LikelihoodFunction):
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$ $$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
""" """
assert y.shape == f.shape assert y.shape == f.shape
dlik_hess_dsigma = 0.5*np.diag((1.0/(self._variance**2))*self.I)[:, None] dlik_hess_dsigma = np.diag((1.0/(self._variance**2))*self.I)[:, None]
return dlik_hess_dsigma return dlik_hess_dsigma
def _gradients(self, y, f, extra_data=None): def _gradients(self, y, f, extra_data=None):

View file

@ -3,6 +3,7 @@ import unittest
import GPy import GPy
from GPy.models import GradientChecker from GPy.models import GradientChecker
import functools import functools
import inspect
def dparam_partial(inst_func, *args): def dparam_partial(inst_func, *args):
""" """
@ -22,66 +23,71 @@ def dparam_partial(inst_func, *args):
return inst_func(*args) return inst_func(*args)
return functools.partial(param_func, inst_func=inst_func, args=args) return functools.partial(param_func, inst_func=inst_func, args=args)
def grad_checker_wrt_params(func, dfunc, params, args, randomize=False, verbose=False): def dparam_checkgrad(func, dfunc, params, args, constrain_positive=True, randomize=False, verbose=False):
""" """
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
However if we are holding other parameters fixed and moving something else However if we are holding other parameters fixed and moving something else
We need to check the gradient of each of the fixed parameters (f and y for example) seperately We need to check the gradient of each of the fixed parameters
Whilst moving another parameter. otherwise f: gives back R^N and df: gives back R^NxM where M is (f and y for example) seperately.
Whilst moving another parameter. otherwise f: gives back R^N and
df: gives back R^NxM where M is
The number of parameters and N is the number of data The number of parameters and N is the number of data
Need to take a slice out from f and a slice out of df Need to take a slice out from f and a slice out of df
""" """
print "{} likelihood: {} vs {}".format(func.im_self.__class__.__name__, #print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
func.__name__, dfunc.__name__) #func.__name__, dfunc.__name__)
partial_f = dparam_partial(func, *args) partial_f = dparam_partial(func, *args)
partial_df = dparam_partial(dfunc, *args) partial_df = dparam_partial(dfunc, *args)
gradchecked = False gradchecking = True
for param in params: for param in params:
fnum = np.atleast_1d(partial_f(param)).shape[0] fnum = np.atleast_1d(partial_f(param)).shape[0]
dfnum = np.atleast_1d(partial_df(param)).shape[0] dfnum = np.atleast_1d(partial_df(param)).shape[0]
for fixed_val in range(dfnum): for fixed_val in range(dfnum):
f_ind = min(fnum, fixed_val+1) - 1 #dlik and dlik_dvar gives back 1 value for each #dlik and dlik_dvar gives back 1 value for each
f_ind = min(fnum, fixed_val+1) - 1
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind], grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
lambda x : np.atleast_1d(partial_df(x))[fixed_val], lambda x : np.atleast_1d(partial_df(x))[fixed_val],
param, 'p') param, 'p')
grad.constrain_positive('p') if constrain_positive:
grad.constrain_positive('p')
if randomize: if randomize:
grad.randomize() grad.randomize()
print grad
if verbose: if verbose:
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
cg = grad.checkgrad() if not grad.checkgrad():
print cg gradchecking = False
if cg:
print "True" return gradchecking
gradchecked = True
else:
print "False"
return False
print str(gradchecked)
return gradchecked
class LaplaceTests(unittest.TestCase): class LaplaceTests(unittest.TestCase):
def setUp(self): def setUp(self):
self.N = 5 self.N = 1
self.D = 1 self.D = 5
self.X = np.linspace(0, 1, self.N)[:, None] self.X = np.linspace(0, 1, self.N)[:, None]
self.real_std = 0.2 self.real_std = 0.2
noise = np.random.randn(*self.X.shape)*self.real_std noise = np.random.randn(*self.X.shape)*self.real_std
self.Y = np.sin(self.X*2*np.pi) + noise self.Y = np.sin(self.X*2*np.pi) + noise
#self.Y = np.array([[1.0]])#np.sin(self.X*2*np.pi) + noise
self.f = np.random.rand(self.N, 1) self.f = np.random.rand(self.N, 1)
#self.f = np.array([[3.0]])#np.sin(self.X*2*np.pi) + noise
self.var = 0.1 self.var = np.random.rand(1)
self.stu_t = GPy.likelihoods.functions.StudentT(deg_free=5, sigma2=self.var) self.stu_t = GPy.likelihoods.functions.StudentT(deg_free=5, sigma2=self.var)
self.gauss = GPy.likelihoods.functions.Gaussian(self.var, self.D, self.N) self.gauss = GPy.likelihoods.functions.Gaussian(self.var, self.D, self.N)
def tearDown(self): def tearDown(self):
self.stu_t = None self.stu_t = None
self.gauss = None self.gauss = None
self.Y = None
self.f = None
self.X = None
def test_gaussian_dlik_df(self): def test_gaussian_dlik_df(self):
print "\n{}".format(inspect.stack()[0][3])
link = functools.partial(self.gauss.link_function, self.Y) link = functools.partial(self.gauss.link_function, self.Y)
dlik_df = functools.partial(self.gauss.dlik_df, self.Y) dlik_df = functools.partial(self.gauss.dlik_df, self.Y)
grad = GradientChecker(link, dlik_df, self.f.copy(), 'f') grad = GradientChecker(link, dlik_df, self.f.copy(), 'f')
@ -90,6 +96,7 @@ class LaplaceTests(unittest.TestCase):
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_gaussian_d2lik_d2f(self): def test_gaussian_d2lik_d2f(self):
print "\n{}".format(inspect.stack()[0][3])
dlik_df = functools.partial(self.gauss.dlik_df, self.Y) dlik_df = functools.partial(self.gauss.dlik_df, self.Y)
d2lik_d2f = functools.partial(self.gauss.d2lik_d2f, self.Y) d2lik_d2f = functools.partial(self.gauss.d2lik_d2f, self.Y)
grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f') grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f')
@ -98,6 +105,7 @@ class LaplaceTests(unittest.TestCase):
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_gaussian_d3lik_d3f(self): def test_gaussian_d3lik_d3f(self):
print "\n{}".format(inspect.stack()[0][3])
d2lik_d2f = functools.partial(self.gauss.d2lik_d2f, self.Y) d2lik_d2f = functools.partial(self.gauss.d2lik_d2f, self.Y)
d3lik_d3f = functools.partial(self.gauss.d3lik_d3f, self.Y) d3lik_d3f = functools.partial(self.gauss.d3lik_d3f, self.Y)
grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f') grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f')
@ -106,28 +114,31 @@ class LaplaceTests(unittest.TestCase):
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_gaussian_dlik_dvar(self): def test_gaussian_dlik_dvar(self):
#link = dparam_partial(self.gauss.link_function, self.Y, self.f) print "\n{}".format(inspect.stack()[0][3])
#dlik_dvar = dparam_partial(self.gauss.dlik_dvar, self.Y, self.f) self.assertTrue(
#grad = GradientChecker(link, dlik_dvar, self.var, 'v') dparam_checkgrad(self.gauss.link_function, self.gauss.dlik_dvar,
#grad.constrain_positive('v') [self.var], args=(self.Y, self.f), constrain_positive=True,
#grad.randomize() randomize=False, verbose=True)
#grad.checkgrad(verbose=1) )
#self.assertTrue(grad.checkgrad())
self.assertTrue(grad_checker_wrt_params(self.gauss.link_function, self.gauss.dlik_dvar,
[self.var], args=(self.Y, self.f), randomize=True, verbose=True))
def test_gaussian_dlik_df_dvar(self): def test_gaussian_dlik_df_dvar(self):
#dlik_df = dparam_partial(self.gauss.dlik_df, self.Y, self.f) print "\n{}".format(inspect.stack()[0][3])
#dlik_df_dvar = dparam_partial(self.gauss.dlik_df_dvar, self.Y, self.f) self.assertTrue(
#grad = GradientChecker(dlik_df, dlik_df_dvar, self.var, 'v') dparam_checkgrad(self.gauss.dlik_df, self.gauss.dlik_df_dvar,
#grad.constrain_positive('v') [self.var], args=(self.Y.copy(), self.f.copy()), constrain_positive=True,
#grad.randomize() randomize=False, verbose=True)
#grad.checkgrad(verbose=1) )
#self.assertTrue(grad.checkgrad())
self.assertTrue(grad_checker_wrt_params(self.gauss.dlik_df, self.gauss.dlik_df_dvar, def test_gaussian_d2lik_d2f_dvar(self):
[self.var], args=(self.Y, self.f), randomize=True, verbose=True)) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue(
dparam_checkgrad(self.gauss.d2lik_d2f, self.gauss.d2lik_d2f_dvar,
[self.var], args=(self.Y, self.f), constrain_positive=True,
randomize=True, verbose=True)
)
def test_studentt_dlik_df(self): def test_studentt_dlik_df(self):
print "\n{}".format(inspect.stack()[0][3])
link = functools.partial(self.stu_t.link_function, self.Y) link = functools.partial(self.stu_t.link_function, self.Y)
dlik_df = functools.partial(self.stu_t.dlik_df, self.Y) dlik_df = functools.partial(self.stu_t.dlik_df, self.Y)
grad = GradientChecker(link, dlik_df, self.f.copy(), 'f') grad = GradientChecker(link, dlik_df, self.f.copy(), 'f')
@ -135,6 +146,7 @@ class LaplaceTests(unittest.TestCase):
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
def test_studentt_d2lik_d2f(self): def test_studentt_d2lik_d2f(self):
print "\n{}".format(inspect.stack()[0][3])
dlik_df = functools.partial(self.stu_t.dlik_df, self.Y) dlik_df = functools.partial(self.stu_t.dlik_df, self.Y)
d2lik_d2f = functools.partial(self.stu_t.d2lik_d2f, self.Y) d2lik_d2f = functools.partial(self.stu_t.d2lik_d2f, self.Y)
grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f') grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f')
@ -142,6 +154,7 @@ class LaplaceTests(unittest.TestCase):
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
def test_studentt_d3lik_d3f(self): def test_studentt_d3lik_d3f(self):
print "\n{}".format(inspect.stack()[0][3])
d2lik_d2f = functools.partial(self.stu_t.d2lik_d2f, self.Y) d2lik_d2f = functools.partial(self.stu_t.d2lik_d2f, self.Y)
d3lik_d3f = functools.partial(self.stu_t.d3lik_d3f, self.Y) d3lik_d3f = functools.partial(self.stu_t.d3lik_d3f, self.Y)
grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f') grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f')
@ -149,39 +162,29 @@ class LaplaceTests(unittest.TestCase):
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
def test_studentt_dlik_dvar(self): def test_studentt_dlik_dvar(self):
#link = dparam_partial(self.stu_t.link_function, self.Y, self.f) print "\n{}".format(inspect.stack()[0][3])
#dlik_dvar = dparam_partial(self.stu_t.dlik_dvar, self.Y, self.f) self.assertTrue(
#grad = GradientChecker(link, dlik_dvar, self.var, 'v') dparam_checkgrad(self.stu_t.link_function, self.stu_t.dlik_dvar,
#grad.constrain_positive('v') [self.var], args=(self.Y.copy(), self.f.copy()),
#grad.randomize() constrain_positive=True, randomize=True, verbose=True)
#grad.checkgrad(verbose=1) )
#self.assertTrue(grad.checkgrad())
self.assertTrue(grad_checker_wrt_params(self.stu_t.link_function, self.stu_t.dlik_dvar,
[self.var], args=(self.Y.copy(), self.f.copy()), randomize=True, verbose=True))
def test_studentt_dlik_df_dvar(self): def test_studentt_dlik_df_dvar(self):
#dlik_df = dparam_partial(self.stu_t.dlik_df, self.Y, self.f) print "\n{}".format(inspect.stack()[0][3])
#dlik_df_dvar = dparam_partial(self.stu_t.dlik_df_dvar, self.Y, self.f) self.assertTrue(
#grad = GradientChecker(dlik_df, dlik_df_dvar, self.var, 'v') dparam_checkgrad(self.stu_t.dlik_df, self.stu_t.dlik_df_dvar,
#grad.constrain_positive('v') [self.var], args=(self.Y.copy(), self.f.copy()),
#grad.randomize() constrain_positive=True, randomize=True, verbose=True)
#grad.checkgrad(verbose=1) )
#self.assertTrue(grad.checkgrad())
self.assertTrue(grad_checker_wrt_params(self.stu_t.dlik_df, self.stu_t.dlik_df_dvar, def test_studentt_d2lik_d2f_dvar(self):
[self.var], args=(self.Y.copy(), self.f.copy()), randomize=True, verbose=True)) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue(
dparam_checkgrad(self.stu_t.d2lik_d2f, self.stu_t.d2lik_d2f_dvar,
[self.var], args=(self.Y.copy(), self.f.copy()),
constrain_positive=True, randomize=True, verbose=True)
)
if __name__ == "__main__": if __name__ == "__main__":
#N = 5
#D = 1
#X = np.linspace(0, 1, N)[:, None]
#real_std = 0.2
#noise = np.random.randn(*X.shape)*real_std
#Y = np.sin(X*2*np.pi) + noise
#f = np.random.rand(N, 1)
#var = 0.1
#stu_t = GPy.likelihoods.functions.StudentT(deg_free=5, sigma2=var)
#print grad_checker_wrt_params(stu_t.dlik_df, stu_t.dlik_df_dvar, [var], args=(Y, f), randomize=True, verbose=False)
print "Running unit tests" print "Running unit tests"
unittest.main() unittest.main()