Added more options to generic tests (constraining link function

values as bernoulli requies R^{0,1}) and implemented new gradients
for bernoulli
This commit is contained in:
Alan Saul 2013-10-17 17:44:08 +01:00
parent f3fd9f1325
commit 1848653fce
4 changed files with 285 additions and 121 deletions

View file

@ -93,6 +93,110 @@ class Bernoulli(NoiseDistribution):
p = self.gp_link.transf(gp) p = self.gp_link.transf(gp)
return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp) return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp)
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
\\p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: likelihood evaluated for this point
:rtype: float
.. Note:
Each y_{i} must be in {0,1}
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
objective = (link_f**y) * ((1.-link_f)**(1.-y))
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:returns: log likelihood evaluated for this point
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
objective = np.log(link_f**y) + np.log((1.-link_f)**(1.-y))
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: gradient of log likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
grad = (y/link_f) - (1.-y)/(1-link_f)
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
.. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
return d3logpdf_dlink3
def _mean(self,gp): def _mean(self,gp):
""" """
Mass (or density) function Mass (or density) function

View file

@ -102,7 +102,7 @@ class Gaussian(NoiseDistribution):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data not used in gaussian
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
@ -121,11 +121,11 @@ class Gaussian(NoiseDistribution):
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data not used in gaussian
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi)) return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, extra_data=None):
@ -133,17 +133,17 @@ class Gaussian(NoiseDistribution):
Gradient of the pdf at y, given link(f) w.r.t link(f) Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{1}{\\sigma^{2}}(y_{i} - f_{i}) \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
:param link_f: latent variables link(f) :param link_f: latent variables link(f)
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data not used in gaussian
:returns: gradient of log likelihood evaluated at points :returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
s2_i = (1.0/self.variance) s2_i = (1.0/self.variance)
grad = s2_i*y - s2_i*link_f grad = s2_i*y - s2_i*link_f
return grad return grad
@ -151,24 +151,24 @@ class Gaussian(NoiseDistribution):
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, extra_data=None):
""" """
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
i.e. second derivative _nlog_mass at y given f_{i} f_{j} w.r.t f_{i} and f_{j} i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
.. math:: .. math::
\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = -\\frac{1}{\\sigma^{2}} \\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
:param link_f: latent variables link(f) :param link_f: latent variables link(f)
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data not used in gaussian
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points f) :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array :rtype: Nx1 array
.. Note:: .. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i} (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
hess = -(1.0/self.variance)*np.ones((self.N, 1)) hess = -(1.0/self.variance)*np.ones((self.N, 1))
return hess return hess
@ -177,18 +177,18 @@ class Gaussian(NoiseDistribution):
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = 0 \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
:param link_f: latent variables link(f) :param link_f: latent variables link(f)
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data not used in gaussian
:returns: third derivative of log likelihood evaluated at points f :returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
return d3logpdf_dlink3 return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None): def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
@ -196,17 +196,17 @@ class Gaussian(NoiseDistribution):
Gradient of the negative log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance) Gradient of the negative log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - f_{i})^{2}}{2\\sigma^{4}} \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
:param link_f: latent variables link(f) :param link_f: latent variables link(f)
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data not used in gaussian
:returns: derivative of log likelihood evaluated at points f w.r.t variance parameter :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f e = y - link_f
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.dot(e.T, e) dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.dot(e.T, e)
@ -217,17 +217,17 @@ class Gaussian(NoiseDistribution):
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance) Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{1}{\\sigma^{4}}(-y_{i} + f_{i}) \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
:param link_f: latent variables link(f) :param link_f: latent variables link(f)
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data not used in gaussian
:returns: derivative of log likelihood evaluated at points f w.r.t variance parameter :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
dlik_grad_dsigma = -np.dot(s_4*self.I, y) + np.dot(s_4*self.I, link_f) dlik_grad_dsigma = -np.dot(s_4*self.I, y) + np.dot(s_4*self.I, link_f)
return dlik_grad_dsigma return dlik_grad_dsigma
@ -237,17 +237,17 @@ class Gaussian(NoiseDistribution):
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance) Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{1}{\\sigma^{4}} \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
:param link_f: latent variables link(f) :param link_f: latent variables link(f)
:type link_f: Nx1 array :type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data not used in gaussian
:returns: derivative of log hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None] d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar

View file

@ -55,7 +55,7 @@ class StudentT(NoiseDistribution):
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f e = y - link_f
#Careful gamma(big_number) is infinity! #Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
@ -80,7 +80,7 @@ class StudentT(NoiseDistribution):
:rtype: float :rtype: float
""" """
assert link_f.shape == y.shape assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f e = y - link_f
objective = (+ gammaln((self.v + 1) * 0.5) objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5) - gammaln(self.v * 0.5)
@ -113,7 +113,7 @@ class StudentT(NoiseDistribution):
def d2logpdf_dlink2(self, link_f, y, extra_data=None): def d2logpdf_dlink2(self, link_f, y, extra_data=None):
""" """
Hessian at y, given link(f), w.r.t link(f) the hessian will be 0 unless i == j Hessian at y, given link(f), w.r.t link(f) the hessian will be 0 unless i == j
i.e. second derivative lik_function at y given f_{i} f_{j} w.r.t f_{i} and f_{j} i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
.. math:: .. math::
\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}} \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}}
@ -128,7 +128,7 @@ class StudentT(NoiseDistribution):
.. Note:: .. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i} (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert y.shape == link_f.shape assert y.shape == link_f.shape
e = y - link_f e = y - link_f

View file

@ -5,6 +5,7 @@ from GPy.models import GradientChecker
import functools import functools
import inspect import inspect
from GPy.likelihoods.noise_models import gp_transformations from GPy.likelihoods.noise_models import gp_transformations
from functools import partial
def dparam_partial(inst_func, *args): def dparam_partial(inst_func, *args):
""" """
@ -24,7 +25,7 @@ def dparam_partial(inst_func, *args):
return inst_func(*args) return inst_func(*args)
return functools.partial(param_func, inst_func=inst_func, args=args) return functools.partial(param_func, inst_func=inst_func, args=args)
def dparam_checkgrad(func, dfunc, params, args, constrain_positive=True, randomize=False, verbose=False): def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False):
""" """
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
However if we are holding other parameters fixed and moving something else However if we are holding other parameters fixed and moving something else
@ -50,8 +51,10 @@ def dparam_checkgrad(func, dfunc, params, args, constrain_positive=True, randomi
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind], grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
lambda x : np.atleast_1d(partial_df(x))[fixed_val], lambda x : np.atleast_1d(partial_df(x))[fixed_val],
param, 'p') param, 'p')
if constrain_positive: #This is not general for more than one param...
grad.constrain_positive('p') if constraints is not None:
for constraint in constraints:
constraint('p', grad)
if randomize: if randomize:
grad.randomize() grad.randomize()
print grad print grad
@ -77,6 +80,7 @@ class TestNoiseModels(object):
noise = np.random.randn(*self.X[:, 0].shape)*self.real_std noise = np.random.randn(*self.X[:, 0].shape)*self.real_std
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None] self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
self.f = np.random.rand(self.N, 1) self.f = np.random.rand(self.N, 1)
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
self.var = 0.2 self.var = 0.2
@ -92,6 +96,22 @@ class TestNoiseModels(object):
def test_noise_models(self): def test_noise_models(self):
self.setUp() self.setUp()
####################################################
# Constraint wrappers so we can just list them off #
####################################################
def constrain_negative(regex, model):
model.constrain_negative(regex)
def constrain_positive(regex, model):
model.constrain_positive(regex)
def constrain_bounded(regex, model, lower, upper):
"""
Used like: partial(constrain_bounded, lower=0, upper=1)
"""
model.constrain_bounded(regex, lower, upper)
""" """
Dictionary where we nest models we would like to check Dictionary where we nest models we would like to check
Name: { Name: {
@ -99,9 +119,10 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": [names_of_params_we_want, to_grad_check], "names": [names_of_params_we_want, to_grad_check],
"vals": [values_of_params, to_start_at], "vals": [values_of_params, to_start_at],
"constrain_positive": [boolean_values, of_whether_to_constrain] "constrain": [constraint_wrappers, listed_here]
}, },
"laplace": boolean_of_whether_model_should_work_for_laplace "laplace": boolean_of_whether_model_should_work_for_laplace,
"link_f_constraints": [constraint_wrappers, listed_here]
} }
""" """
noise_models = {"Student_t_default": { noise_models = {"Student_t_default": {
@ -109,7 +130,7 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [self.var], "vals": [self.var],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
}, },
@ -118,7 +139,7 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [1], "vals": [1],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
}, },
@ -127,7 +148,7 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [0.01], "vals": [0.01],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
}, },
@ -136,7 +157,7 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [self.var], "vals": [self.var],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
}, },
@ -145,7 +166,7 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [self.var], "vals": [self.var],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
}, },
@ -154,7 +175,7 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["noise_model_variance"], "names": ["noise_model_variance"],
"vals": [self.var], "vals": [self.var],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
}, },
@ -163,7 +184,7 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["noise_model_variance"], "names": ["noise_model_variance"],
"vals": [self.var], "vals": [self.var],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
}, },
@ -172,7 +193,7 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["noise_model_variance"], "names": ["noise_model_variance"],
"vals": [self.var], "vals": [self.var],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
}, },
@ -181,18 +202,42 @@ class TestNoiseModels(object):
"grad_params": { "grad_params": {
"names": ["noise_model_variance"], "names": ["noise_model_variance"],
"vals": [self.var], "vals": [self.var],
"constrain_positive": [True] "constraints": [constrain_positive]
}, },
"laplace": True "laplace": True
} },
"Bernoulli_default": {
"model": GPy.likelihoods.bernoulli(),
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
} }
}
for name, attributes in noise_models.iteritems(): for name, attributes in noise_models.iteritems():
model = attributes["model"] model = attributes["model"]
params = attributes["grad_params"] if "grad_params" in attributes:
param_vals = params["vals"] params = attributes["grad_params"]
param_names= params["names"] param_vals = params["vals"]
constrain_positive = params["constrain_positive"] param_names= params["names"]
param_constraints = params["constraints"]
else:
params = []
param_vals = []
param_names = []
constrain_positive = []
if "link_f_constraints" in attributes:
link_f_constraints = attributes["link_f_constraints"]
else:
link_f_constraints = []
if "Y" in attributes:
Y = attributes["Y"].copy()
else:
Y = self.Y.copy()
if "f" in attributes:
f = attributes["f"].copy()
else:
f = self.f.copy()
laplace = attributes["laplace"] laplace = attributes["laplace"]
if len(param_vals) > 1: if len(param_vals) > 1:
@ -200,27 +245,27 @@ class TestNoiseModels(object):
#Required by all #Required by all
#Normal derivatives #Normal derivatives
yield self.t_logpdf, model yield self.t_logpdf, model, Y, f
yield self.t_dlogpdf_df, model yield self.t_dlogpdf_df, model, Y, f
yield self.t_d2logpdf_df2, model yield self.t_d2logpdf_df2, model, Y, f
#Link derivatives #Link derivatives
yield self.t_dlogpdf_dlink, model yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints
yield self.t_d2logpdf_dlink2, model yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints
if laplace: if laplace:
#Laplace only derivatives #Laplace only derivatives
yield self.t_d3logpdf_df3, model yield self.t_d3logpdf_df3, model, Y, f
yield self.t_d3logpdf_dlink3, model yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
#Params #Params
yield self.t_dlogpdf_dparams, model, param_vals yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints
yield self.t_dlogpdf_df_dparams, model, param_vals yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints
yield self.t_d2logpdf2_df2_dparams, model, param_vals yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints
#Link params #Link params
yield self.t_dlogpdf_link_dparams, model, param_vals yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints
yield self.t_dlogpdf_dlink_dparams, model, param_vals yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints
yield self.t_d2logpdf2_dlink2_dparams, model, param_vals yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints
#laplace likelihood gradcheck #laplace likelihood gradcheck
yield self.t_laplace_fit_rbf_white, model, param_vals, param_names, constrain_positive yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
self.tearDown() self.tearDown()
@ -228,42 +273,42 @@ class TestNoiseModels(object):
# dpdf_df's # # dpdf_df's #
############# #############
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_logpdf(self, model): def t_logpdf(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
np.log(model.pdf(self.f.copy(), self.Y.copy())), np.log(model.pdf(f.copy(), Y.copy())),
model.logpdf(self.f.copy(), self.Y.copy())) model.logpdf(f.copy(), Y.copy()))
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_df(self, model): def t_dlogpdf_df(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.description = "\n{}".format(inspect.stack()[0][3]) self.description = "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(model.logpdf, y=self.Y) logpdf = functools.partial(model.logpdf, y=Y)
dlogpdf_df = functools.partial(model.dlogpdf_df, y=self.Y) dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
grad = GradientChecker(logpdf, dlogpdf_df, self.f.copy(), 'g') grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
print model print model
assert grad.checkgrad() assert grad.checkgrad()
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf_df2(self, model): def t_d2logpdf_df2(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
dlogpdf_df = functools.partial(model.dlogpdf_df, y=self.Y) dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=self.Y) d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g') grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
print model print model
assert grad.checkgrad() assert grad.checkgrad()
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d3logpdf_df3(self, model): def t_d3logpdf_df3(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=self.Y) d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=self.Y) d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y)
grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, self.f.copy(), 'g') grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
print model print model
@ -273,32 +318,32 @@ class TestNoiseModels(object):
# df_dparams # # df_dparams #
############## ##############
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_dparams(self, model, params): def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta, dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
params, args=(self.f, self.Y), constrain_positive=True, params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_df_dparams(self, model, params): def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta, dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
params, args=(self.f, self.Y), constrain_positive=True, params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf2_df2_dparams(self, model, params): def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta, dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
params, args=(self.f, self.Y), constrain_positive=True, params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@ -306,33 +351,48 @@ class TestNoiseModels(object):
# dpdf_dlink's # # dpdf_dlink's #
################ ################
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_dlink(self, model): def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(model.logpdf_link, y=self.Y) logpdf = functools.partial(model.logpdf_link, y=Y)
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=self.Y) dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
grad = GradientChecker(logpdf, dlogpdf_dlink, self.f.copy(), 'g') grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g')
#Apply constraints to link_f values
for constraint in link_f_constraints:
constraint('g', grad)
grad.randomize()
print grad
grad.checkgrad(verbose=1)
assert grad.checkgrad()
@with_setup(setUp, tearDown)
def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3])
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g')
#Apply constraints to link_f values
for constraint in link_f_constraints:
constraint('g', grad)
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
print grad print grad
assert grad.checkgrad() assert grad.checkgrad()
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf_dlink2(self, model): def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=self.Y) d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=self.Y) d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y)
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, self.f.copy(), 'g') grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1) #Apply constraints to link_f values
print grad for constraint in link_f_constraints:
assert grad.checkgrad() constraint('g', grad)
@with_setup(setUp, tearDown)
def t_d3logpdf_dlink3(self, model):
print "\n{}".format(inspect.stack()[0][3])
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=self.Y)
d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=self.Y)
grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, self.f.copy(), 'g')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
print grad print grad
@ -342,32 +402,32 @@ class TestNoiseModels(object):
# dlink_dparams # # dlink_dparams #
################# #################
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_link_dparams(self, model, params): def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta, dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
params, args=(self.f, self.Y), constrain_positive=True, params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_dlink_dparams(self, model, params): def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta, dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
params, args=(self.f, self.Y), constrain_positive=True, params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf2_dlink2_dparams(self, model, params): def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta, dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
params, args=(self.f, self.Y), constrain_positive=True, params, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@ -375,26 +435,26 @@ class TestNoiseModels(object):
# laplace test # # laplace test #
################ ################
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_laplace_fit_rbf_white(self, model, param_vals, param_names, constrain_positive): def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.Y = self.Y/self.Y.max() #Normalize
Y = Y/Y.max()
white_var = 0.001 white_var = 0.001
kernel = GPy.kern.rbf(self.X.shape[1]) + GPy.kern.white(self.X.shape[1]) kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
laplace_likelihood = GPy.likelihoods.Laplace(self.Y.copy(), model) laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model)
m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=laplace_likelihood) m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood)
m.ensure_default_constraints() m.ensure_default_constraints()
m.constrain_fixed('white', white_var) m.constrain_fixed('white', white_var)
for param_num in range(len(param_names)): for param_num in range(len(param_names)):
name = param_names[param_num] name = param_names[param_num]
if constrain_positive[param_num]:
m.constrain_positive(name)
m[name] = param_vals[param_num] m[name] = param_vals[param_num]
constraints[param_num](name, m)
m.randomize() m.randomize()
m.checkgrad(verbose=1, step=self.step) m.checkgrad(verbose=1, step=step)
print m print m
assert m.checkgrad(step=self.step) assert m.checkgrad(step=step)
class LaplaceTests(unittest.TestCase): class LaplaceTests(unittest.TestCase):