From 96f189113ac037bbb709535c9c75997571c225f6 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 15 Oct 2013 12:25:19 +0100 Subject: [PATCH] Started on chaining, must remember to chain _laplace_gradients aswell! --- GPy/likelihoods/laplace.py | 14 +- .../noise_models/gaussian_noise.py | 155 +++++----- .../noise_models/student_t_noise.py | 126 +++++---- GPy/testing/laplace_tests.py | 265 +++++++++++------- 4 files changed, 325 insertions(+), 235 deletions(-) diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py index 26365467..f4233554 100644 --- a/GPy/likelihoods/laplace.py +++ b/GPy/likelihoods/laplace.py @@ -76,7 +76,7 @@ class Laplace(likelihood): return self.noise_model._set_params(p) def _shared_gradients_components(self): - d3lik_d3fhat = -self.noise_model._d3nlog_mass_dgp3(self.f_hat, self.data, extra_data=self.extra_data) + d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data) dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5? I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i) return dL_dfhat, I_KW_i @@ -89,7 +89,7 @@ class Laplace(likelihood): :rtype: Matrix (1 x num_kernel_params) """ dL_dfhat, I_KW_i = self._shared_gradients_components() - dlp = -self.noise_model._dnlog_mass_dgp(self.data, self.f_hat) + dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data) #Explicit #expl_a = np.dot(self.Ki_f, self.Ki_f.T) @@ -178,7 +178,7 @@ class Laplace(likelihood): self.Wi_K_i = self.W12BiW12 self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K) - self.lik = -self.noise_model._nlog_mass(self.f_hat, self.data, extra_data=self.extra_data) + self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data) self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde) Z_tilde = (+ self.lik @@ -223,7 +223,7 @@ class Laplace(likelihood): Compute the variables required to compute gaussian Y variables """ #At this point get the hessian matrix (or vector as W is diagonal) - self.W = -self.noise_model.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data) + self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data) #TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N)) @@ -290,7 +290,7 @@ class Laplace(likelihood): old_obj = np.inf def obj(Ki_f, f): - return -0.5*np.dot(Ki_f.T, f) - self.noise_model._nlog_mass(f, self.data, extra_data=self.extra_data) + return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data) difference = np.inf epsilon = 1e-6 @@ -299,10 +299,10 @@ class Laplace(likelihood): i = 0 while difference > epsilon and i < MAX_ITER: - W = -self.noise_model.d2lik_d2f(self.data, f, extra_data=self.extra_data) + W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data) W_f = W*f - grad = -self.noise_model._dnlog_mass_dgp(f, self.data, extra_data=self.extra_data) + grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data) b = W_f + grad W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b)) diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py index 51b7c6a1..7b2e1a85 100644 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -80,63 +80,82 @@ class Gaussian(NoiseDistribution): def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None): return 1./(1./self.variance + 1./sigma**2) - def _mass(self, gp, obs): + def _mass(self, link_f, y): + #FIXME: Careful now passing link_f in not gp (f)! #return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) #Assumes no covariance, exp, sum, log for numerical stability - return np.exp(np.sum(np.log(stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance))))) + #return np.exp(np.sum(np.log(stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance))))) + #return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance))))) + return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance))))) - def _nlog_mass(self, gp, obs, extra_data=None): + def _nlog_mass(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\ + Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def _dnlog_mass_dgp(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\ + Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\ + Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def logpdf(self, link_f, y, extra_data=None): """ - Negative Log likelihood function - Chained with link function deriative + Log likelihood function .. math:: - \\-ln p(y_{i}|\\lambda(f_{i})) = +\\frac{D \\ln 2\\pi}{2} + \\frac{\\ln |K|}{2} + \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} + \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} - :param gp: latent variables (f) - :type gp: Nx1 array - :param obs: data (y) - :type obs: Nx1 array + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: likelihood evaluated for this point :rtype: float """ - assert gp.shape == obs.shape - return .5*(np.sum((self.gp_link.transf(gp)-obs)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi)) + assert link_f.shape == y.shape + return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi)) - def _dnlog_mass_dgp(self, gp, obs, extra_data=None): + def dlogpdf_dlink(self, link_f, y, extra_data=None): """ - Negative Gradient of the link function at y, given f w.r.t f - Chained with link function deriative + Gradient of the pdf at y, given link(f) w.r.t link(f) .. math:: \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{1}{\\sigma^{2}}(y_{i} - f_{i}) - \\frac{d \\-ln p(y_{i}|f_{i})}{df} = -\\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))\\frac{d\\lambda(f_{i})}{df_{i}} - :param gp: latent variables (f) - :type gp: Nx1 array - :param obs: data (y) - :type obs: Nx1 array + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: gradient of negative likelihood evaluated at points :rtype: Nx1 array """ - assert gp.shape == obs.shape - return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp) + assert link_f.shape == y.shape + s2_i = (1.0/self.variance) + grad = s2_i*y - s2_i*link_f + return grad - def _d2nlog_mass_dgp2(self, gp, obs, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, extra_data=None): """ - Negative Hessian at y, given f, w.r.t f the hessian will be 0 unless i == j + Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j i.e. second derivative _nlog_mass at y given f_{i} f_{j} w.r.t f_{i} and f_{j} - Chained with link function deriative .. math:: \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = -\\frac{1}{\\sigma^{2}} - :param gp: latent variables (f) - :type gp: Nx1 array - :param obs: data (y) - :type obs: Nx1 array + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array @@ -145,91 +164,89 @@ class Gaussian(NoiseDistribution): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_{i} depends only on f_{i} not on f_{j!=i} """ - assert gp.shape == obs.shape - #FIXME: Why squared? - return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance + assert link_f.shape == y.shape + hess = -(1.0/self.variance)*np.ones((self.N, 1)) + return hess - def _d3nlog_mass_dgp3(self, gp, obs, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, extra_data=None): """ - Third order derivative log-likelihood function at y given f w.r.t f - Chained with link function deriative + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) .. math:: \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = 0 - :param gp: latent variables (f) - :type gp: Nx1 array - :param obs: data (y) - :type obs: Nx1 array + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert gp.shape == obs.shape - d2lambda_df2 = self.gp_link.d2transf_df2(gp) - return ((self.gp_link.transf(gp)-obs)*self.gp_link.d3transf_df3(gp) - self.gp_link.dtransf_df(gp)*d2lambda_df2 + d2lambda_df2)/self.variance + assert link_f.shape == y.shape + d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? + return d3logpdf_dlink3 - def _dnlog_mass_dvar(self, gp, obs, extra_data=None): + def dlogpdf_dvar(self, link_f, y, extra_data=None): """ - Gradient of the negative log-likelihood function at y given f, w.r.t variance parameter (noise_variance) + Gradient of the negative log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance) .. math:: \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - f_{i})^{2}}{2\\sigma^{4}} - :param gp: latent variables (f) - :type gp: Nx1 array - :param obs: data (y) - :type obs: Nx1 array + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float """ - assert gp.shape == obs.shape - e = (obs - self.gp_link.transf(gp)) + assert link_f.shape == y.shape + e = y - link_f s_4 = 1.0/(self.variance**2) - dnlik_dsigma = 0.5*self.N/self.variance - 0.5*s_4*np.dot(e.T, e) - return np.sum(dnlik_dsigma) # Sure about this sum? + dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.dot(e.T, e) + return np.sum(dlik_dsigma) # Sure about this sum? - def _dnlog_mass_dgp_dvar(self, gp, obs, extra_data=None): + def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): """ - Derivative of the dlik_df w.r.t variance parameter (noise_variance) + Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{1}{\\sigma^{4}}(-y_{i} + f_{i}) + :param link_f: latent variables link(f) + :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param f: latent variables f - :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array """ - assert gp.shape == obs.shape + assert link_f.shape == y.shape s_4 = 1.0/(self.variance**2) - dnlik_grad_dsigma = s_4*(obs - self.gp_link.transf(gp))*self.gp_link.dtransf_df(gp) - return dnlik_grad_dsigma + dlik_grad_dsigma = -np.dot(s_4*self.I, y) + np.dot(s_4*self.I, link_f) + return dlik_grad_dsigma - def _d2nlog_mass_dgp2_dvar(self, gp, obs, extra_data=None): + def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): """ - Gradient of the hessian (d2lik_d2f) w.r.t variance parameter (noise_variance) + Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{1}{\\sigma^{4}} - :param gp: latent variables (f) - :type gp: Nx1 array - :param obs: data (y) - :type obs: Nx1 array + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ - assert gp.shape == obs.shape + assert link_f.shape == y.shape s_4 = 1.0/(self.variance**2) - #FIXME: Why squared? - dnlik_hess_dvar = -s_4*((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2) - return dnlik_hess_dvar + d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None] + return d2logpdf_dlink2_dvar def _mean(self,gp): """ diff --git a/GPy/likelihoods/noise_models/student_t_noise.py b/GPy/likelihoods/noise_models/student_t_noise.py index c4319313..dcd41fda 100644 --- a/GPy/likelihoods/noise_models/student_t_noise.py +++ b/GPy/likelihoods/noise_models/student_t_noise.py @@ -40,64 +40,82 @@ class StudentT(NoiseDistribution): def variance(self, extra_data=None): return (self.v / float(self.v - 2)) * self.sigma2 - def _nlog_mass(self, gp, obs, extra_data=None): + def _nlog_mass(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\ + Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def _dnlog_mass_dgp(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\ + Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in likelihood.py for link function evaluation\ + Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def logpdf(self, link_f, y, extra_data=None): """ Log Likelihood Function .. math:: \\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2 - :param gp: latent variables (f) - :type gp: Nx1 array - :param obs: data (y) - :type obs: Nx1 array + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: likelihood evaluated for this point :rtype: float """ - assert gp.shape == obs.shape - e = obs - self.gp_link.transf(gp) + assert link_f.shape == y.shape + e = y - link_f objective = (+ gammaln((self.v + 1) * 0.5) - gammaln(self.v * 0.5) - 0.5*np.log(self.sigma2 * self.v * np.pi) - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) ) - return -np.sum(objective) + return np.sum(objective) - def dlik_df(self, y, f, extra_data=None): + def dlogpdf_dlink(self, link_f, y, extra_data=None): """ - Gradient of the log likelihood function at y, given f w.r.t f + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) .. math:: \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \\sigma^{2}v} + :param link_f: latent variables (f) + :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param f: latent variables f - :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: gradient of likelihood evaluated at points :rtype: Nx1 array """ - assert y.shape == f.shape - e = y - f + assert y.shape == link_f.shape + e = y - link_f grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) return grad - def d2lik_d2f(self, y, f, extra_data=None): + def d2logpdf_dlink2(self, link_f, y, extra_data=None): """ - Hessian at y, given f, w.r.t f the hessian will be 0 unless i == j + Hessian at y, given link(f), w.r.t link(f) the hessian will be 0 unless i == j i.e. second derivative lik_function at y given f_{i} f_{j} w.r.t f_{i} and f_{j} .. math:: \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}} + :param link_f: latent variables link(f) + :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param f: latent variables f - :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) :rtype: Nx1 array @@ -106,101 +124,101 @@ class StudentT(NoiseDistribution): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_{i} depends only on f_{i} not on f_{j!=i} """ - assert y.shape == f.shape - e = y - f + assert y.shape == link_f.shape + e = y - link_f hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) return hess - def d3lik_d3f(self, y, f, extra_data=None): + def d3logpdf_dlink3(self, link_f, y, extra_data=None): """ Third order derivative log-likelihood function at y given f w.r.t f .. math:: \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = \\frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \\sigma^{2} v))}{((y_{i} - f_{i}) + \\sigma^{2} v)^3} + :param link_f: latent variables link(f) + :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param f: latent variables f - :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert y.shape == f.shape - e = y - f - d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / + assert y.shape == link_f.shape + e = y - link_f + d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / ((e**2 + self.sigma2*self.v)**3) ) - return d3lik_d3f + return d3lik_dlink3 - def dlik_dvar(self, y, f, extra_data=None): + def dlogpdf_dvar(self, link_f, y, extra_data=None): """ Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) .. math:: \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{v((y_{i} - f_{i})^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - f_{i})^{2})} + :param link_f: latent variables link(f) + :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param f: latent variables f - :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float """ - assert y.shape == f.shape - e = y - f - dlik_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) - #FIXME: May not want to sum over all dimensions if using many D? - return np.sum(dlik_dvar) + assert y.shape == link_f.shape + e = y - link_f + dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) + #FIXME: Careful as this hasn't been chained with dlink_var, not sure if we want link functions on our parameters?! Shouldn't need them with constraints + return np.sum(dlogpdf_dvar) - def dlik_df_dvar(self, y, f, extra_data=None): + def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): """ - Derivative of the dlik_df w.r.t variance parameter (t_noise) + Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \\sigma^2 v)^2} + :param link_f: latent variables link_f + :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param f: latent variables f - :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array """ - assert y.shape == f.shape - e = y - f - dlik_grad_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) - return dlik_grad_dvar + assert y.shape == link_f.shape + e = y - link_f + dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) + return dlogpdf_dlink_dvar - def d2lik_d2f_dvar(self, y, f, extra_data=None): + def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): """ - Gradient of the hessian (d2lik_d2f) w.r.t variance parameter (t_noise) + Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - f_{i})^{2})}{(\\sigma^{2}v + (y_{i} - f_{i})^{2})^{3}} + :param link_f: latent variables link(f) + :type link_f: Nx1 array :param y: data :type y: Nx1 array - :param f: latent variables f - :type f: Nx1 array :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ - assert y.shape == f.shape - e = y - f - dlik_hess_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) + assert y.shape == link_f.shape + e = y - link_f + d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) / ((self.sigma2*self.v + (e**2))**3) ) - return dlik_hess_dvar + return d2logpdf_dlink2_dvar def _laplace_gradients(self, y, f, extra_data=None): #must be listed in same order as 'get_param_names' - derivs = ([self.dlik_dvar(y, f, extra_data=extra_data)], - [self.dlik_df_dvar(y, f, extra_data=extra_data)], - [self.d2lik_d2f_dvar(y, f, extra_data=extra_data)] + derivs = ([self.dlogpdf_dvar(f, y, extra_data=extra_data)], + [self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)], + [self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)] ) # lists as we might learn many parameters # ensure we have gradients for every parameter we want to optimize assert len(derivs[0]) == len(self._get_param_names()) diff --git a/GPy/testing/laplace_tests.py b/GPy/testing/laplace_tests.py index 1154052e..936241b1 100644 --- a/GPy/testing/laplace_tests.py +++ b/GPy/testing/laplace_tests.py @@ -89,91 +89,124 @@ class LaplaceTests(unittest.TestCase): self.f = None self.X = None - def test_lik_mass(self): + def test_mass_logpdf(self): print "\n{}".format(inspect.stack()[0][3]) np.testing.assert_almost_equal( - np.sum(self.gauss._nlog_mass(self.f.copy(), self.Y.copy())), - -self.gauss.lik_function(self.Y.copy(), self.f.copy())) + np.log(self.gauss._mass(self.f.copy(), self.Y.copy())), + self.gauss.logpdf(self.f.copy(), self.Y.copy())) - def test_mass_nlog_mass(self): + + """ dGauss_df's """ + @unittest.skip("Not Implemented Yet") + def test_gaussian_dlogpdf_df(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) - np.testing.assert_almost_equal( - -np.log(self.gauss._mass(self.f.copy(), self.Y.copy())), - self.gauss._nlog_mass(self.f.copy(), self.Y.copy())) - - def test_mass_dnlog_mass_dgp_ndlik_df(self): - print "\n{}".format(inspect.stack()[0][3]) - np.testing.assert_almost_equal( - self.gauss._dnlog_mass_dgp(gp=self.f.copy(), obs=self.Y.copy()), - -self.gauss.dlik_df(y=self.Y.copy(), f=self.f.copy())) - - def test_mass_d2nlog_mass_dgp2_nd2lik_d2f(self): - print "\n{}".format(inspect.stack()[0][3]) - np.testing.assert_almost_equal( - self.gauss._d2nlog_mass_dgp2(gp=self.f.copy(), obs=self.Y.copy()), - -self.gauss.d2lik_d2f(y=self.Y.copy(), f=self.f.copy())) - - def test_mass_d2nlog_mass_dgp3_nd2lik_d3f(self): - print "\n{}".format(inspect.stack()[0][3]) - np.testing.assert_almost_equal( - self.gauss._d3nlog_mass_dgp3(gp=self.f.copy(), obs=self.Y.copy()), - -self.gauss.d3lik_d3f(y=self.Y.copy(), f=self.f.copy())) - - - def test_gaussian_dnlog_mass_dgp(self): - print "\n{}".format(inspect.stack()[0][3]) - link = functools.partial(self.gauss._nlog_mass, obs=self.Y) - dlik_df = functools.partial(self.gauss._dnlog_mass_dgp, obs=self.Y) - grad = GradientChecker(link, dlik_df, self.f.copy(), 'g') + logpdf = functools.partial(self.gauss.logpdf, y=self.Y) + dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) + grad = GradientChecker(logpdf, dlogpdf_df, self.f.copy(), 'g') grad.randomize() grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) - def test_gaussian_d2nlog_mass_d2gp(self): + @unittest.skip("Not Implemented Yet") + def test_gaussian_d2logpdf_df2(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) - link = functools.partial(self.gauss._dnlog_mass_dgp, obs=self.Y) - dlik_df = functools.partial(self.gauss._d2nlog_mass_dgp2, obs=self.Y) - grad = GradientChecker(link, dlik_df, self.f.copy(), 'g') + dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) + d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) + grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g') grad.randomize() grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) - def test_gaussian_d3nlog_mass_d3gp(self): + @unittest.skip("Not Implemented Yet") + def test_gaussian_d3logpdf_df3(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) - link = functools.partial(self.gauss._d2nlog_mass_dgp2, obs=self.Y) - dlik_df = functools.partial(self.gauss._d3nlog_mass_dgp3, obs=self.Y) - grad = GradientChecker(link, dlik_df, self.f.copy(), 'g') + d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) + d3logpdf_df3 = functools.partial(self.gauss.d3logpdf_df3, y=self.Y) + grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, self.f.copy(), 'g') grad.randomize() grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) - def test_gaussian_dnlog_mass_dvar(self): + @unittest.skip("Not Implemented Yet") + def test_gaussian_dlogpdf_df_dvar(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) self.assertTrue( - dparam_checkgrad(self.gauss._nlog_mass, self.gauss._dnlog_mass_dvar, - [self.var], args=(self.Y, self.f), constrain_positive=True, + dparam_checkgrad(self.gauss.dlogpdf_df, self.gauss.dlogpdf_df_dvar, + [self.var], args=(self.f, self.Y), constrain_positive=True, randomize=False, verbose=True) ) - def test_gaussian_dnlog_mass_dgp_dvar(self): + @unittest.skip("Not Implemented Yet") + def test_gaussian_d2logpdf2_df2_dvar(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) self.assertTrue( - dparam_checkgrad(self.gauss._dnlog_mass_dgp, self.gauss._dnlog_mass_dgp_dvar, - [self.var], args=(self.Y, self.f), constrain_positive=True, + dparam_checkgrad(self.gauss.d2logpdf_df2, self.gauss.d2logpdf_df2_dvar, + [self.var], args=(self.f, self.Y), constrain_positive=True, randomize=False, verbose=True) ) - def test_gaussian_d2nlog_mass_d2gp_dvar(self): + + """ dGauss_dlink's """ + def test_gaussian_dlogpdf_dlink(self): + print "\n{}".format(inspect.stack()[0][3]) + logpdf = functools.partial(self.gauss.logpdf, y=self.Y) + dlogpdf_dlink = functools.partial(self.gauss.dlogpdf_dlink, y=self.Y) + grad = GradientChecker(logpdf, dlogpdf_dlink, self.f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) + + def test_gaussian_d2logpdf_dlink2(self): + print "\n{}".format(inspect.stack()[0][3]) + dlogpdf_dlink = functools.partial(self.gauss.dlogpdf_dlink, y=self.Y) + d2logpdf_dlink2 = functools.partial(self.gauss.d2logpdf_dlink2, y=self.Y) + grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, self.f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) + + def test_gaussian_d3logpdf_dlink3(self): + print "\n{}".format(inspect.stack()[0][3]) + d2logpdf_dlink2 = functools.partial(self.gauss.d2logpdf_dlink2, y=self.Y) + d3logpdf_dlink3 = functools.partial(self.gauss.d3logpdf_dlink3, y=self.Y) + grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, self.f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) + + def test_gaussian_dlogpdf_dvar(self): print "\n{}".format(inspect.stack()[0][3]) self.assertTrue( - dparam_checkgrad(self.gauss._d2nlog_mass_dgp2, self.gauss._d2nlog_mass_dgp2_dvar, - [self.var], args=(self.Y, self.f), constrain_positive=True, + dparam_checkgrad(self.gauss.logpdf, self.gauss.dlogpdf_dvar, + [self.var], args=(self.f, self.Y), constrain_positive=True, randomize=False, verbose=True) ) + def test_gaussian_dlogpdf_dlink_dvar(self): + print "\n{}".format(inspect.stack()[0][3]) + self.assertTrue( + dparam_checkgrad(self.gauss.dlogpdf_dlink, self.gauss.dlogpdf_dlink_dvar, + [self.var], args=(self.f, self.Y), constrain_positive=True, + randomize=False, verbose=True) + ) + + def test_gaussian_d2logpdf2_dlink2_dvar(self): + print "\n{}".format(inspect.stack()[0][3]) + self.assertTrue( + dparam_checkgrad(self.gauss.d2logpdf_dlink2, self.gauss.d2logpdf_dlink2_dvar, + [self.var], args=(self.f, self.Y), constrain_positive=True, + randomize=False, verbose=True) + ) + + """ Gradchecker fault """ @unittest.expectedFailure - def test_gaussian_d2lik_d2f_2(self): + def test_gaussian_d2logpdf_df2_2(self): print "\n{}".format(inspect.stack()[0][3]) self.Y = None self.gauss = None @@ -187,99 +220,121 @@ class LaplaceTests(unittest.TestCase): self.f = np.random.rand(self.N, 1) self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N) - dlik_df = functools.partial(self.gauss._dnlog_mass_dgp, obs=self.Y) - d2lik_d2f = functools.partial(self.gauss._d2nlog_mass_dgp2, obs=self.Y) - grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f') - grad.randomize() - grad.checkgrad(verbose=1) - grad.checkgrad() - - self.assertTrue(grad.checkgrad()) - - def test_gaussian_d3lik_d3f(self): - print "\n{}".format(inspect.stack()[0][3]) - d2lik_d2f = functools.partial(self.gauss.d2lik_d2f, self.Y) - d3lik_d3f = functools.partial(self.gauss.d3lik_d3f, self.Y) - grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f') + dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) + d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) + grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g') grad.randomize() grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) - def test_gaussian_dlik_dvar(self): + """ dStudentT_df's """ + @unittest.skip("Not Implemented Yet") + def test_studentt_dlogpdf_df(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) - self.assertTrue( - dparam_checkgrad(self.gauss.lik_function, self.gauss.dlik_dvar, - [self.var], args=(self.Y, self.f), constrain_positive=True, - randomize=False, verbose=True) - ) - - def test_gaussian_dlik_df_dvar(self): - print "\n{}".format(inspect.stack()[0][3]) - self.assertTrue( - dparam_checkgrad(self.gauss.dlik_df, self.gauss.dlik_df_dvar, - [self.var], args=(self.Y.copy(), self.f.copy()), constrain_positive=True, - randomize=False, verbose=True) - ) - - def test_gaussian_d2lik_d2f_dvar(self): - print "\n{}".format(inspect.stack()[0][3]) - self.assertTrue( - dparam_checkgrad(self.gauss.d2lik_d2f, self.gauss.d2lik_d2f_dvar, - [self.var], args=(self.Y, self.f.copy()), constrain_positive=True, - randomize=True, verbose=True) - ) - - def test_studentt_dlik_df(self): - print "\n{}".format(inspect.stack()[0][3]) - link = functools.partial(self.stu_t.lik_function, self.Y) - dlik_df = functools.partial(self.stu_t.dlik_df, self.Y) - grad = GradientChecker(link, dlik_df, self.f.copy(), 'f') + link = functools.partial(self.stu_t.logpdf, y=self.Y) + dlogpdf_df = functools.partial(self.stu_t.dlogpdf_df, y=self.Y) + grad = GradientChecker(link, dlogpdf_df, self.f.copy(), 'f') grad.randomize() grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) - def test_studentt_d2lik_d2f(self): + @unittest.skip("Not Implemented Yet") + def test_studentt_d2logpdf_df2(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) - dlik_df = functools.partial(self.stu_t.dlik_df, self.Y) - d2lik_d2f = functools.partial(self.stu_t.d2lik_d2f, self.Y) - grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f') + dlogpdf_df = functools.partial(self.stu_t.dlogpdf_df, y=self.Y) + d2logpdf_df2 = functools.partial(self.stu_t.d2logpdf_df2, y=self.Y) + grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'f') grad.randomize() grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) + @unittest.skip("Not Implemented Yet") def test_studentt_d3lik_d3f(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) - d2lik_d2f = functools.partial(self.stu_t.d2lik_d2f, self.Y) - d3lik_d3f = functools.partial(self.stu_t.d3lik_d3f, self.Y) - grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f') + d2logpdf_df2 = functools.partial(self.stu_t.d2logpdf_d2f, y=self.Y) + d3logpdf_df3 = functools.partial(self.stu_t.d3logpdf_d3f, y=self.Y) + grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, self.f.copy(), 'f') grad.randomize() grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) - def test_studentt_dlik_dvar(self): + @unittest.skip("Not Implemented Yet") + def test_studentt_dlogpdf_df_dvar(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) self.assertTrue( - dparam_checkgrad(self.stu_t.lik_function, self.stu_t.dlik_dvar, + dparam_checkgrad(self.stu_t.dlogpdf_df, self.stu_t.dlogpdf_df_dvar, [self.var], args=(self.Y.copy(), self.f.copy()), constrain_positive=True, randomize=True, verbose=True) ) - def test_studentt_dlik_df_dvar(self): + @unittest.skip("Not Implemented Yet") + def test_studentt_d2logpdf_df2_dvar(self): + #FIXME: Needs non-identity Link function print "\n{}".format(inspect.stack()[0][3]) self.assertTrue( - dparam_checkgrad(self.stu_t.dlik_df, self.stu_t.dlik_df_dvar, + dparam_checkgrad(self.stu_t.d2logpdf_df2, self.stu_t.d2logpdf_df2_dvar, [self.var], args=(self.Y.copy(), self.f.copy()), constrain_positive=True, randomize=True, verbose=True) ) - def test_studentt_d2lik_d2f_dvar(self): + """ dStudentT_dlink's """ + def test_studentt_dlogpdf_dlink(self): + print "\n{}".format(inspect.stack()[0][3]) + logpdf = functools.partial(self.stu_t.logpdf, y=self.Y) + dlogpdf_dlink = functools.partial(self.stu_t.dlogpdf_dlink, y=self.Y) + grad = GradientChecker(logpdf, dlogpdf_dlink, self.f.copy(), 'f') + grad.randomize() + grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) + + def test_studentt_d2logpdf_dlink2(self): + print "\n{}".format(inspect.stack()[0][3]) + dlogpdf_dlink = functools.partial(self.stu_t.dlogpdf_dlink, y=self.Y) + d2logpdf_dlink2 = functools.partial(self.stu_t.d2logpdf_dlink2, y=self.Y) + grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, self.f.copy(), 'f') + grad.randomize() + grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) + + def test_studentt_d3logpdf_dlink3(self): + print "\n{}".format(inspect.stack()[0][3]) + d2logpdf_dlink2 = functools.partial(self.stu_t.d2logpdf_dlink2, y=self.Y) + d3logpdf_dlink3 = functools.partial(self.stu_t.d3logpdf_dlink3, y=self.Y) + grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, self.f.copy(), 'f') + grad.randomize() + grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) + + def test_studentt_dlogpdf_dvar(self): print "\n{}".format(inspect.stack()[0][3]) self.assertTrue( - dparam_checkgrad(self.stu_t.d2lik_d2f, self.stu_t.d2lik_d2f_dvar, + dparam_checkgrad(self.stu_t.logpdf, self.stu_t.dlogpdf_dvar, [self.var], args=(self.Y.copy(), self.f.copy()), constrain_positive=True, randomize=True, verbose=True) ) + def test_studentt_dlogpdf_dlink_dvar(self): + print "\n{}".format(inspect.stack()[0][3]) + self.assertTrue( + dparam_checkgrad(self.stu_t.dlogpdf_dlink, self.stu_t.dlogpdf_dlink_dvar, + [self.var], args=(self.Y.copy(), self.f.copy()), + constrain_positive=True, randomize=True, verbose=True) + ) + + def test_studentt_d2logpdf_dlink2_dvar(self): + print "\n{}".format(inspect.stack()[0][3]) + self.assertTrue( + dparam_checkgrad(self.stu_t.d2logpdf_dlink2, self.stu_t.d2logpdf_dlink2_dvar, + [self.var], args=(self.Y.copy(), self.f.copy()), + constrain_positive=True, randomize=True, verbose=True) + ) + + + """ Grad check whole models (grad checking Laplace not just noise models """ def test_gauss_rbf(self): print "\n{}".format(inspect.stack()[0][3]) self.Y = self.Y/self.Y.max()