Started on chaining, must remember to chain _laplace_gradients aswell!

This commit is contained in:
Alan Saul 2013-10-15 12:25:19 +01:00
parent a0aac76812
commit 96f189113a
4 changed files with 325 additions and 235 deletions

View file

@ -76,7 +76,7 @@ class Laplace(likelihood):
return self.noise_model._set_params(p) return self.noise_model._set_params(p)
def _shared_gradients_components(self): def _shared_gradients_components(self):
d3lik_d3fhat = -self.noise_model._d3nlog_mass_dgp3(self.f_hat, self.data, extra_data=self.extra_data) d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data)
dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5? dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5?
I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i) I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i)
return dL_dfhat, I_KW_i return dL_dfhat, I_KW_i
@ -89,7 +89,7 @@ class Laplace(likelihood):
:rtype: Matrix (1 x num_kernel_params) :rtype: Matrix (1 x num_kernel_params)
""" """
dL_dfhat, I_KW_i = self._shared_gradients_components() dL_dfhat, I_KW_i = self._shared_gradients_components()
dlp = -self.noise_model._dnlog_mass_dgp(self.data, self.f_hat) dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data)
#Explicit #Explicit
#expl_a = np.dot(self.Ki_f, self.Ki_f.T) #expl_a = np.dot(self.Ki_f, self.Ki_f.T)
@ -178,7 +178,7 @@ class Laplace(likelihood):
self.Wi_K_i = self.W12BiW12 self.Wi_K_i = self.W12BiW12
self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K) self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
self.lik = -self.noise_model._nlog_mass(self.f_hat, self.data, extra_data=self.extra_data) self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde) self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
Z_tilde = (+ self.lik Z_tilde = (+ self.lik
@ -223,7 +223,7 @@ class Laplace(likelihood):
Compute the variables required to compute gaussian Y variables Compute the variables required to compute gaussian Y variables
""" """
#At this point get the hessian matrix (or vector as W is diagonal) #At this point get the hessian matrix (or vector as W is diagonal)
self.W = -self.noise_model.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data) self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though #TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N)) self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
@ -290,7 +290,7 @@ class Laplace(likelihood):
old_obj = np.inf old_obj = np.inf
def obj(Ki_f, f): def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.T, f) - self.noise_model._nlog_mass(f, self.data, extra_data=self.extra_data) return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
difference = np.inf difference = np.inf
epsilon = 1e-6 epsilon = 1e-6
@ -299,10 +299,10 @@ class Laplace(likelihood):
i = 0 i = 0
while difference > epsilon and i < MAX_ITER: while difference > epsilon and i < MAX_ITER:
W = -self.noise_model.d2lik_d2f(self.data, f, extra_data=self.extra_data) W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data)
W_f = W*f W_f = W*f
grad = -self.noise_model._dnlog_mass_dgp(f, self.data, extra_data=self.extra_data) grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data)
b = W_f + grad b = W_f + grad
W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b)) W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b))

View file

@ -80,63 +80,82 @@ class Gaussian(NoiseDistribution):
def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None): def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None):
return 1./(1./self.variance + 1./sigma**2) return 1./(1./self.variance + 1./sigma**2)
def _mass(self, gp, obs): def _mass(self, link_f, y):
#FIXME: Careful now passing link_f in not gp (f)!
#return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) #return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) )
#Assumes no covariance, exp, sum, log for numerical stability #Assumes no covariance, exp, sum, log for numerical stability
return np.exp(np.sum(np.log(stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance))))) #return np.exp(np.sum(np.log(stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance)))))
#return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
def _nlog_mass(self, gp, obs, extra_data=None): def _nlog_mass(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def logpdf(self, link_f, y, extra_data=None):
""" """
Negative Log likelihood function Log likelihood function
Chained with link function deriative
.. math:: .. math::
\\-ln p(y_{i}|\\lambda(f_{i})) = +\\frac{D \\ln 2\\pi}{2} + \\frac{\\ln |K|}{2} + \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param gp: latent variables (f) :param link_f: latent variables link(f)
:type gp: Nx1 array :type link_f: Nx1 array
:param obs: data (y) :param y: data
:type obs: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert gp.shape == obs.shape assert link_f.shape == y.shape
return .5*(np.sum((self.gp_link.transf(gp)-obs)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi)) return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
def _dnlog_mass_dgp(self, gp, obs, extra_data=None): def dlogpdf_dlink(self, link_f, y, extra_data=None):
""" """
Negative Gradient of the link function at y, given f w.r.t f Gradient of the pdf at y, given link(f) w.r.t link(f)
Chained with link function deriative
.. math:: .. math::
\\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{1}{\\sigma^{2}}(y_{i} - f_{i}) \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{1}{\\sigma^{2}}(y_{i} - f_{i})
\\frac{d \\-ln p(y_{i}|f_{i})}{df} = -\\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))\\frac{d\\lambda(f_{i})}{df_{i}}
:param gp: latent variables (f) :param link_f: latent variables link(f)
:type gp: Nx1 array :type link_f: Nx1 array
:param obs: data (y) :param y: data
:type obs: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: gradient of negative likelihood evaluated at points :returns: gradient of negative likelihood evaluated at points
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert gp.shape == obs.shape assert link_f.shape == y.shape
return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp) s2_i = (1.0/self.variance)
grad = s2_i*y - s2_i*link_f
return grad
def _d2nlog_mass_dgp2(self, gp, obs, extra_data=None): def d2logpdf_dlink2(self, link_f, y, extra_data=None):
""" """
Negative Hessian at y, given f, w.r.t f the hessian will be 0 unless i == j Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
i.e. second derivative _nlog_mass at y given f_{i} f_{j} w.r.t f_{i} and f_{j} i.e. second derivative _nlog_mass at y given f_{i} f_{j} w.r.t f_{i} and f_{j}
Chained with link function deriative
.. math:: .. math::
\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = -\\frac{1}{\\sigma^{2}} \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
:param gp: latent variables (f) :param link_f: latent variables link(f)
:type gp: Nx1 array :type link_f: Nx1 array
:param obs: data (y) :param y: data
:type obs: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array :rtype: Nx1 array
@ -145,91 +164,89 @@ class Gaussian(NoiseDistribution):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i} (the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
""" """
assert gp.shape == obs.shape assert link_f.shape == y.shape
#FIXME: Why squared? hess = -(1.0/self.variance)*np.ones((self.N, 1))
return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance return hess
def _d3nlog_mass_dgp3(self, gp, obs, extra_data=None): def d3logpdf_dlink3(self, link_f, y, extra_data=None):
""" """
Third order derivative log-likelihood function at y given f w.r.t f Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
Chained with link function deriative
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = 0 \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = 0
:param gp: latent variables (f) :param link_f: latent variables link(f)
:type gp: Nx1 array :type link_f: Nx1 array
:param obs: data (y) :param y: data
:type obs: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert gp.shape == obs.shape assert link_f.shape == y.shape
d2lambda_df2 = self.gp_link.d2transf_df2(gp) d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
return ((self.gp_link.transf(gp)-obs)*self.gp_link.d3transf_df3(gp) - self.gp_link.dtransf_df(gp)*d2lambda_df2 + d2lambda_df2)/self.variance return d3logpdf_dlink3
def _dnlog_mass_dvar(self, gp, obs, extra_data=None): def dlogpdf_dvar(self, link_f, y, extra_data=None):
""" """
Gradient of the negative log-likelihood function at y given f, w.r.t variance parameter (noise_variance) Gradient of the negative log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - f_{i})^{2}}{2\\sigma^{4}} \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - f_{i})^{2}}{2\\sigma^{4}}
:param gp: latent variables (f) :param link_f: latent variables link(f)
:type gp: Nx1 array :type link_f: Nx1 array
:param obs: data (y) :param y: data
:type obs: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert gp.shape == obs.shape assert link_f.shape == y.shape
e = (obs - self.gp_link.transf(gp)) e = y - link_f
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
dnlik_dsigma = 0.5*self.N/self.variance - 0.5*s_4*np.dot(e.T, e) dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.dot(e.T, e)
return np.sum(dnlik_dsigma) # Sure about this sum? return np.sum(dlik_dsigma) # Sure about this sum?
def _dnlog_mass_dgp_dvar(self, gp, obs, extra_data=None): def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
""" """
Derivative of the dlik_df w.r.t variance parameter (noise_variance) Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{1}{\\sigma^{4}}(-y_{i} + f_{i}) \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{1}{\\sigma^{4}}(-y_{i} + f_{i})
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert gp.shape == obs.shape assert link_f.shape == y.shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
dnlik_grad_dsigma = s_4*(obs - self.gp_link.transf(gp))*self.gp_link.dtransf_df(gp) dlik_grad_dsigma = -np.dot(s_4*self.I, y) + np.dot(s_4*self.I, link_f)
return dnlik_grad_dsigma return dlik_grad_dsigma
def _d2nlog_mass_dgp2_dvar(self, gp, obs, extra_data=None): def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
""" """
Gradient of the hessian (d2lik_d2f) w.r.t variance parameter (noise_variance) Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{1}{\\sigma^{4}} \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{1}{\\sigma^{4}}
:param gp: latent variables (f) :param link_f: latent variables link(f)
:type gp: Nx1 array :type link_f: Nx1 array
:param obs: data (y) :param y: data
:type obs: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert gp.shape == obs.shape assert link_f.shape == y.shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
#FIXME: Why squared? d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
dnlik_hess_dvar = -s_4*((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2) return d2logpdf_dlink2_dvar
return dnlik_hess_dvar
def _mean(self,gp): def _mean(self,gp):
""" """

View file

@ -40,64 +40,82 @@ class StudentT(NoiseDistribution):
def variance(self, extra_data=None): def variance(self, extra_data=None):
return (self.v / float(self.v - 2)) * self.sigma2 return (self.v / float(self.v - 2)) * self.sigma2
def _nlog_mass(self, gp, obs, extra_data=None): def _nlog_mass(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def logpdf(self, link_f, y, extra_data=None):
""" """
Log Likelihood Function Log Likelihood Function
.. math:: .. math::
\\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2 \\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2
:param gp: latent variables (f) :param link_f: latent variables (link(f))
:type gp: Nx1 array :type link_f: Nx1 array
:param obs: data (y) :param y: data
:type obs: Nx1 array :type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert gp.shape == obs.shape assert link_f.shape == y.shape
e = obs - self.gp_link.transf(gp) e = y - link_f
objective = (+ gammaln((self.v + 1) * 0.5) objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5) - gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi) - 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
) )
return -np.sum(objective) return np.sum(objective)
def dlik_df(self, y, f, extra_data=None): def dlogpdf_dlink(self, link_f, y, extra_data=None):
""" """
Gradient of the log likelihood function at y, given f w.r.t f Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \\sigma^{2}v} \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \\sigma^{2}v}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: gradient of likelihood evaluated at points :returns: gradient of likelihood evaluated at points
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert y.shape == f.shape assert y.shape == link_f.shape
e = y - f e = y - link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad return grad
def d2lik_d2f(self, y, f, extra_data=None): def d2logpdf_dlink2(self, link_f, y, extra_data=None):
""" """
Hessian at y, given f, w.r.t f the hessian will be 0 unless i == j Hessian at y, given link(f), w.r.t link(f) the hessian will be 0 unless i == j
i.e. second derivative lik_function at y given f_{i} f_{j} w.r.t f_{i} and f_{j} i.e. second derivative lik_function at y given f_{i} f_{j} w.r.t f_{i} and f_{j}
.. math:: .. math::
\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}} \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array :rtype: Nx1 array
@ -106,101 +124,101 @@ class StudentT(NoiseDistribution):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i} (the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
""" """
assert y.shape == f.shape assert y.shape == link_f.shape
e = y - f e = y - link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
def d3lik_d3f(self, y, f, extra_data=None): def d3logpdf_dlink3(self, link_f, y, extra_data=None):
""" """
Third order derivative log-likelihood function at y given f w.r.t f Third order derivative log-likelihood function at y given f w.r.t f
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = \\frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \\sigma^{2} v))}{((y_{i} - f_{i}) + \\sigma^{2} v)^3} \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = \\frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \\sigma^{2} v))}{((y_{i} - f_{i}) + \\sigma^{2} v)^3}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert y.shape == f.shape assert y.shape == link_f.shape
e = y - f e = y - link_f
d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3) ((e**2 + self.sigma2*self.v)**3)
) )
return d3lik_d3f return d3lik_dlink3
def dlik_dvar(self, y, f, extra_data=None): def dlogpdf_dvar(self, link_f, y, extra_data=None):
""" """
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{v((y_{i} - f_{i})^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - f_{i})^{2})} \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{v((y_{i} - f_{i})^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - f_{i})^{2})}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert y.shape == f.shape assert y.shape == link_f.shape
e = y - f e = y - link_f
dlik_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
#FIXME: May not want to sum over all dimensions if using many D? #FIXME: Careful as this hasn't been chained with dlink_var, not sure if we want link functions on our parameters?! Shouldn't need them with constraints
return np.sum(dlik_dvar) return np.sum(dlogpdf_dvar)
def dlik_df_dvar(self, y, f, extra_data=None): def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
""" """
Derivative of the dlik_df w.r.t variance parameter (t_noise) Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \\sigma^2 v)^2} \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \\sigma^2 v)^2}
:param link_f: latent variables link_f
:type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert y.shape == f.shape assert y.shape == link_f.shape
e = y - f e = y - link_f
dlik_grad_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlik_grad_dvar return dlogpdf_dlink_dvar
def d2lik_d2f_dvar(self, y, f, extra_data=None): def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
""" """
Gradient of the hessian (d2lik_d2f) w.r.t variance parameter (t_noise) Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - f_{i})^{2})}{(\\sigma^{2}v + (y_{i} - f_{i})^{2})^{3}} \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - f_{i})^{2})}{(\\sigma^{2}v + (y_{i} - f_{i})^{2})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param f: latent variables f
:type f: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used :param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert y.shape == f.shape assert y.shape == link_f.shape
e = y - f e = y - link_f
dlik_hess_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3) / ((self.sigma2*self.v + (e**2))**3)
) )
return dlik_hess_dvar return d2logpdf_dlink2_dvar
def _laplace_gradients(self, y, f, extra_data=None): def _laplace_gradients(self, y, f, extra_data=None):
#must be listed in same order as 'get_param_names' #must be listed in same order as 'get_param_names'
derivs = ([self.dlik_dvar(y, f, extra_data=extra_data)], derivs = ([self.dlogpdf_dvar(f, y, extra_data=extra_data)],
[self.dlik_df_dvar(y, f, extra_data=extra_data)], [self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)],
[self.d2lik_d2f_dvar(y, f, extra_data=extra_data)] [self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)]
) # lists as we might learn many parameters ) # lists as we might learn many parameters
# ensure we have gradients for every parameter we want to optimize # ensure we have gradients for every parameter we want to optimize
assert len(derivs[0]) == len(self._get_param_names()) assert len(derivs[0]) == len(self._get_param_names())

View file

@ -89,91 +89,124 @@ class LaplaceTests(unittest.TestCase):
self.f = None self.f = None
self.X = None self.X = None
def test_lik_mass(self): def test_mass_logpdf(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
np.sum(self.gauss._nlog_mass(self.f.copy(), self.Y.copy())), np.log(self.gauss._mass(self.f.copy(), self.Y.copy())),
-self.gauss.lik_function(self.Y.copy(), self.f.copy())) self.gauss.logpdf(self.f.copy(), self.Y.copy()))
def test_mass_nlog_mass(self):
""" dGauss_df's """
@unittest.skip("Not Implemented Yet")
def test_gaussian_dlogpdf_df(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
np.testing.assert_almost_equal( logpdf = functools.partial(self.gauss.logpdf, y=self.Y)
-np.log(self.gauss._mass(self.f.copy(), self.Y.copy())), dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
self.gauss._nlog_mass(self.f.copy(), self.Y.copy())) grad = GradientChecker(logpdf, dlogpdf_df, self.f.copy(), 'g')
def test_mass_dnlog_mass_dgp_ndlik_df(self):
print "\n{}".format(inspect.stack()[0][3])
np.testing.assert_almost_equal(
self.gauss._dnlog_mass_dgp(gp=self.f.copy(), obs=self.Y.copy()),
-self.gauss.dlik_df(y=self.Y.copy(), f=self.f.copy()))
def test_mass_d2nlog_mass_dgp2_nd2lik_d2f(self):
print "\n{}".format(inspect.stack()[0][3])
np.testing.assert_almost_equal(
self.gauss._d2nlog_mass_dgp2(gp=self.f.copy(), obs=self.Y.copy()),
-self.gauss.d2lik_d2f(y=self.Y.copy(), f=self.f.copy()))
def test_mass_d2nlog_mass_dgp3_nd2lik_d3f(self):
print "\n{}".format(inspect.stack()[0][3])
np.testing.assert_almost_equal(
self.gauss._d3nlog_mass_dgp3(gp=self.f.copy(), obs=self.Y.copy()),
-self.gauss.d3lik_d3f(y=self.Y.copy(), f=self.f.copy()))
def test_gaussian_dnlog_mass_dgp(self):
print "\n{}".format(inspect.stack()[0][3])
link = functools.partial(self.gauss._nlog_mass, obs=self.Y)
dlik_df = functools.partial(self.gauss._dnlog_mass_dgp, obs=self.Y)
grad = GradientChecker(link, dlik_df, self.f.copy(), 'g')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_gaussian_d2nlog_mass_d2gp(self): @unittest.skip("Not Implemented Yet")
def test_gaussian_d2logpdf_df2(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
link = functools.partial(self.gauss._dnlog_mass_dgp, obs=self.Y) dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
dlik_df = functools.partial(self.gauss._d2nlog_mass_dgp2, obs=self.Y) d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
grad = GradientChecker(link, dlik_df, self.f.copy(), 'g') grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_gaussian_d3nlog_mass_d3gp(self): @unittest.skip("Not Implemented Yet")
def test_gaussian_d3logpdf_df3(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
link = functools.partial(self.gauss._d2nlog_mass_dgp2, obs=self.Y) d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
dlik_df = functools.partial(self.gauss._d3nlog_mass_dgp3, obs=self.Y) d3logpdf_df3 = functools.partial(self.gauss.d3logpdf_df3, y=self.Y)
grad = GradientChecker(link, dlik_df, self.f.copy(), 'g') grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, self.f.copy(), 'g')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_gaussian_dnlog_mass_dvar(self): @unittest.skip("Not Implemented Yet")
def test_gaussian_dlogpdf_df_dvar(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue( self.assertTrue(
dparam_checkgrad(self.gauss._nlog_mass, self.gauss._dnlog_mass_dvar, dparam_checkgrad(self.gauss.dlogpdf_df, self.gauss.dlogpdf_df_dvar,
[self.var], args=(self.Y, self.f), constrain_positive=True, [self.var], args=(self.f, self.Y), constrain_positive=True,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
def test_gaussian_dnlog_mass_dgp_dvar(self): @unittest.skip("Not Implemented Yet")
def test_gaussian_d2logpdf2_df2_dvar(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue( self.assertTrue(
dparam_checkgrad(self.gauss._dnlog_mass_dgp, self.gauss._dnlog_mass_dgp_dvar, dparam_checkgrad(self.gauss.d2logpdf_df2, self.gauss.d2logpdf_df2_dvar,
[self.var], args=(self.Y, self.f), constrain_positive=True, [self.var], args=(self.f, self.Y), constrain_positive=True,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
def test_gaussian_d2nlog_mass_d2gp_dvar(self):
""" dGauss_dlink's """
def test_gaussian_dlogpdf_dlink(self):
print "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(self.gauss.logpdf, y=self.Y)
dlogpdf_dlink = functools.partial(self.gauss.dlogpdf_dlink, y=self.Y)
grad = GradientChecker(logpdf, dlogpdf_dlink, self.f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_gaussian_d2logpdf_dlink2(self):
print "\n{}".format(inspect.stack()[0][3])
dlogpdf_dlink = functools.partial(self.gauss.dlogpdf_dlink, y=self.Y)
d2logpdf_dlink2 = functools.partial(self.gauss.d2logpdf_dlink2, y=self.Y)
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, self.f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_gaussian_d3logpdf_dlink3(self):
print "\n{}".format(inspect.stack()[0][3])
d2logpdf_dlink2 = functools.partial(self.gauss.d2logpdf_dlink2, y=self.Y)
d3logpdf_dlink3 = functools.partial(self.gauss.d3logpdf_dlink3, y=self.Y)
grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, self.f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_gaussian_dlogpdf_dvar(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue( self.assertTrue(
dparam_checkgrad(self.gauss._d2nlog_mass_dgp2, self.gauss._d2nlog_mass_dgp2_dvar, dparam_checkgrad(self.gauss.logpdf, self.gauss.dlogpdf_dvar,
[self.var], args=(self.Y, self.f), constrain_positive=True, [self.var], args=(self.f, self.Y), constrain_positive=True,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
def test_gaussian_dlogpdf_dlink_dvar(self):
print "\n{}".format(inspect.stack()[0][3])
self.assertTrue(
dparam_checkgrad(self.gauss.dlogpdf_dlink, self.gauss.dlogpdf_dlink_dvar,
[self.var], args=(self.f, self.Y), constrain_positive=True,
randomize=False, verbose=True)
)
def test_gaussian_d2logpdf2_dlink2_dvar(self):
print "\n{}".format(inspect.stack()[0][3])
self.assertTrue(
dparam_checkgrad(self.gauss.d2logpdf_dlink2, self.gauss.d2logpdf_dlink2_dvar,
[self.var], args=(self.f, self.Y), constrain_positive=True,
randomize=False, verbose=True)
)
""" Gradchecker fault """ """ Gradchecker fault """
@unittest.expectedFailure @unittest.expectedFailure
def test_gaussian_d2lik_d2f_2(self): def test_gaussian_d2logpdf_df2_2(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.Y = None self.Y = None
self.gauss = None self.gauss = None
@ -187,99 +220,121 @@ class LaplaceTests(unittest.TestCase):
self.f = np.random.rand(self.N, 1) self.f = np.random.rand(self.N, 1)
self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N) self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N)
dlik_df = functools.partial(self.gauss._dnlog_mass_dgp, obs=self.Y) dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
d2lik_d2f = functools.partial(self.gauss._d2nlog_mass_dgp2, obs=self.Y) d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f') grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g')
grad.randomize()
grad.checkgrad(verbose=1)
grad.checkgrad()
self.assertTrue(grad.checkgrad())
def test_gaussian_d3lik_d3f(self):
print "\n{}".format(inspect.stack()[0][3])
d2lik_d2f = functools.partial(self.gauss.d2lik_d2f, self.Y)
d3lik_d3f = functools.partial(self.gauss.d3lik_d3f, self.Y)
grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_gaussian_dlik_dvar(self): """ dStudentT_df's """
@unittest.skip("Not Implemented Yet")
def test_studentt_dlogpdf_df(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue( link = functools.partial(self.stu_t.logpdf, y=self.Y)
dparam_checkgrad(self.gauss.lik_function, self.gauss.dlik_dvar, dlogpdf_df = functools.partial(self.stu_t.dlogpdf_df, y=self.Y)
[self.var], args=(self.Y, self.f), constrain_positive=True, grad = GradientChecker(link, dlogpdf_df, self.f.copy(), 'f')
randomize=False, verbose=True)
)
def test_gaussian_dlik_df_dvar(self):
print "\n{}".format(inspect.stack()[0][3])
self.assertTrue(
dparam_checkgrad(self.gauss.dlik_df, self.gauss.dlik_df_dvar,
[self.var], args=(self.Y.copy(), self.f.copy()), constrain_positive=True,
randomize=False, verbose=True)
)
def test_gaussian_d2lik_d2f_dvar(self):
print "\n{}".format(inspect.stack()[0][3])
self.assertTrue(
dparam_checkgrad(self.gauss.d2lik_d2f, self.gauss.d2lik_d2f_dvar,
[self.var], args=(self.Y, self.f.copy()), constrain_positive=True,
randomize=True, verbose=True)
)
def test_studentt_dlik_df(self):
print "\n{}".format(inspect.stack()[0][3])
link = functools.partial(self.stu_t.lik_function, self.Y)
dlik_df = functools.partial(self.stu_t.dlik_df, self.Y)
grad = GradientChecker(link, dlik_df, self.f.copy(), 'f')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_studentt_d2lik_d2f(self): @unittest.skip("Not Implemented Yet")
def test_studentt_d2logpdf_df2(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
dlik_df = functools.partial(self.stu_t.dlik_df, self.Y) dlogpdf_df = functools.partial(self.stu_t.dlogpdf_df, y=self.Y)
d2lik_d2f = functools.partial(self.stu_t.d2lik_d2f, self.Y) d2logpdf_df2 = functools.partial(self.stu_t.d2logpdf_df2, y=self.Y)
grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f') grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'f')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
@unittest.skip("Not Implemented Yet")
def test_studentt_d3lik_d3f(self): def test_studentt_d3lik_d3f(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
d2lik_d2f = functools.partial(self.stu_t.d2lik_d2f, self.Y) d2logpdf_df2 = functools.partial(self.stu_t.d2logpdf_d2f, y=self.Y)
d3lik_d3f = functools.partial(self.stu_t.d3lik_d3f, self.Y) d3logpdf_df3 = functools.partial(self.stu_t.d3logpdf_d3f, y=self.Y)
grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f') grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, self.f.copy(), 'f')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad()) self.assertTrue(grad.checkgrad())
def test_studentt_dlik_dvar(self): @unittest.skip("Not Implemented Yet")
def test_studentt_dlogpdf_df_dvar(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue( self.assertTrue(
dparam_checkgrad(self.stu_t.lik_function, self.stu_t.dlik_dvar, dparam_checkgrad(self.stu_t.dlogpdf_df, self.stu_t.dlogpdf_df_dvar,
[self.var], args=(self.Y.copy(), self.f.copy()), [self.var], args=(self.Y.copy(), self.f.copy()),
constrain_positive=True, randomize=True, verbose=True) constrain_positive=True, randomize=True, verbose=True)
) )
def test_studentt_dlik_df_dvar(self): @unittest.skip("Not Implemented Yet")
def test_studentt_d2logpdf_df2_dvar(self):
#FIXME: Needs non-identity Link function
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue( self.assertTrue(
dparam_checkgrad(self.stu_t.dlik_df, self.stu_t.dlik_df_dvar, dparam_checkgrad(self.stu_t.d2logpdf_df2, self.stu_t.d2logpdf_df2_dvar,
[self.var], args=(self.Y.copy(), self.f.copy()), [self.var], args=(self.Y.copy(), self.f.copy()),
constrain_positive=True, randomize=True, verbose=True) constrain_positive=True, randomize=True, verbose=True)
) )
def test_studentt_d2lik_d2f_dvar(self): """ dStudentT_dlink's """
def test_studentt_dlogpdf_dlink(self):
print "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(self.stu_t.logpdf, y=self.Y)
dlogpdf_dlink = functools.partial(self.stu_t.dlogpdf_dlink, y=self.Y)
grad = GradientChecker(logpdf, dlogpdf_dlink, self.f.copy(), 'f')
grad.randomize()
grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_studentt_d2logpdf_dlink2(self):
print "\n{}".format(inspect.stack()[0][3])
dlogpdf_dlink = functools.partial(self.stu_t.dlogpdf_dlink, y=self.Y)
d2logpdf_dlink2 = functools.partial(self.stu_t.d2logpdf_dlink2, y=self.Y)
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, self.f.copy(), 'f')
grad.randomize()
grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_studentt_d3logpdf_dlink3(self):
print "\n{}".format(inspect.stack()[0][3])
d2logpdf_dlink2 = functools.partial(self.stu_t.d2logpdf_dlink2, y=self.Y)
d3logpdf_dlink3 = functools.partial(self.stu_t.d3logpdf_dlink3, y=self.Y)
grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, self.f.copy(), 'f')
grad.randomize()
grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_studentt_dlogpdf_dvar(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.assertTrue( self.assertTrue(
dparam_checkgrad(self.stu_t.d2lik_d2f, self.stu_t.d2lik_d2f_dvar, dparam_checkgrad(self.stu_t.logpdf, self.stu_t.dlogpdf_dvar,
[self.var], args=(self.Y.copy(), self.f.copy()), [self.var], args=(self.Y.copy(), self.f.copy()),
constrain_positive=True, randomize=True, verbose=True) constrain_positive=True, randomize=True, verbose=True)
) )
def test_studentt_dlogpdf_dlink_dvar(self):
print "\n{}".format(inspect.stack()[0][3])
self.assertTrue(
dparam_checkgrad(self.stu_t.dlogpdf_dlink, self.stu_t.dlogpdf_dlink_dvar,
[self.var], args=(self.Y.copy(), self.f.copy()),
constrain_positive=True, randomize=True, verbose=True)
)
def test_studentt_d2logpdf_dlink2_dvar(self):
print "\n{}".format(inspect.stack()[0][3])
self.assertTrue(
dparam_checkgrad(self.stu_t.d2logpdf_dlink2, self.stu_t.d2logpdf_dlink2_dvar,
[self.var], args=(self.Y.copy(), self.f.copy()),
constrain_positive=True, randomize=True, verbose=True)
)
""" Grad check whole models (grad checking Laplace not just noise models """
def test_gauss_rbf(self): def test_gauss_rbf(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.Y = self.Y/self.Y.max() self.Y = self.Y/self.Y.max()