Updated other likelihoods to give back logpdf and gradients for each link_f rather than summing on the inside

This commit is contained in:
Alan Saul 2015-03-09 10:27:21 +00:00
parent 48821a6b73
commit 233c5ee8b4
7 changed files with 22 additions and 42 deletions

View file

@ -57,9 +57,8 @@ class Exponential(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = np.log(link_f) - y*link_f log_objective = np.log(link_f) - y*link_f
return np.sum(log_objective) return log_objective
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
@ -77,7 +76,6 @@ class Exponential(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = 1./link_f - y grad = 1./link_f - y
#grad = y/(link_f**2) - 1./link_f #grad = y/(link_f**2) - 1./link_f
return grad return grad
@ -103,7 +101,6 @@ class Exponential(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -1./(link_f**2) hess = -1./(link_f**2)
#hess = -2*y/(link_f**3) + 1/(link_f**2) #hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess return hess
@ -123,7 +120,6 @@ class Exponential(Likelihood):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2./(link_f**3) d3lik_dlink3 = 2./(link_f**3)
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3 return d3lik_dlink3

View file

@ -66,12 +66,11 @@ class Gamma(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#alpha = self.gp_link.transf(gp)*self.beta #alpha = self.gp_link.transf(gp)*self.beta
#return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) #return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
alpha = link_f*self.beta alpha = link_f*self.beta
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
return np.sum(log_objective) return log_objective
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
@ -90,7 +89,6 @@ class Gamma(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
#old #old
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
@ -118,7 +116,6 @@ class Gamma(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2) hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
#old #old
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
@ -140,6 +137,5 @@ class Gamma(Likelihood):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3) d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
return d3lik_dlink3 return d3lik_dlink3

View file

@ -130,11 +130,10 @@ class Gaussian(Likelihood):
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
N = y.shape[0] N = y.shape[0]
ln_det_cov = N*np.log(self.variance) ln_det_cov = np.log(self.variance)
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi)) return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
@ -151,8 +150,7 @@ class Gaussian(Likelihood):
:returns: gradient of log likelihood evaluated at points link(f) :returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape s2_i = 1.0/self.variance
s2_i = (1.0/self.variance)
grad = s2_i*y - s2_i*link_f grad = s2_i*y - s2_i*link_f
return grad return grad
@ -178,9 +176,9 @@ class Gaussian(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
N = y.shape[0] N = y.shape[0]
hess = -(1.0/self.variance)*np.ones((N, 1)) D = link_f.shape[1]
hess = -(1.0/self.variance)*np.ones((N, D))
return hess return hess
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
@ -198,9 +196,9 @@ class Gaussian(Likelihood):
:returns: third derivative of log likelihood evaluated at points link(f) :returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
N = y.shape[0] N = y.shape[0]
d3logpdf_dlink3 = np.zeros((N,1)) D = link_f.shape[1]
d3logpdf_dlink3 = np.zeros((N,D))
return d3logpdf_dlink3 return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
@ -218,12 +216,11 @@ class Gaussian(Likelihood):
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f e = y - link_f
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
N = y.shape[0] N = y.shape[0]
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e)) dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
return np.sum(dlik_dsigma) # Sure about this sum? return dlik_dsigma
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
""" """
@ -240,7 +237,6 @@ class Gaussian(Likelihood):
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
dlik_grad_dsigma = -s_4*y + s_4*link_f dlik_grad_dsigma = -s_4*y + s_4*link_f
return dlik_grad_dsigma return dlik_grad_dsigma
@ -260,15 +256,15 @@ class Gaussian(Likelihood):
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
N = y.shape[0] N = y.shape[0]
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4 D = link_f.shape[1]
d2logpdf_dlink2_dvar = np.ones((N, D))*s_4
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
return np.asarray([[dlogpdf_dvar]]) return dlogpdf_dvar
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)

View file

@ -425,7 +425,7 @@ class Likelihood(Parameterized):
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, Y_metadata=None): def _laplace_gradients(self, f, y, Y_metadata=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata) dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata) dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata) d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)

View file

@ -105,7 +105,7 @@ class Poisson(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
return -y/(link_f**2) return -y/(link_f**2)
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
""" """
@ -122,7 +122,6 @@ class Poisson(Likelihood):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2*y/(link_f)**3 d3lik_dlink3 = 2*y/(link_f)**3
return d3lik_dlink3 return d3lik_dlink3

View file

@ -86,7 +86,6 @@ class StudentT(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
#FIXME: #FIXME:
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?! #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
@ -97,7 +96,7 @@ class StudentT(Likelihood):
- 0.5*np.log(self.sigma2 * self.v * np.pi) - 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
) )
return np.sum(objective) return objective
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
@ -115,7 +114,6 @@ class StudentT(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad return grad
@ -141,7 +139,6 @@ class StudentT(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
@ -161,7 +158,6 @@ class StudentT(Likelihood):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3) ((e**2 + self.sigma2*self.v)**3)
@ -183,10 +179,9 @@ class StudentT(Likelihood):
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar) return dlogpdf_dvar
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None): def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
@ -203,7 +198,6 @@ class StudentT(Likelihood):
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar return dlogpdf_dlink_dvar
@ -223,7 +217,6 @@ class StudentT(Likelihood):
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3) / ((self.sigma2*self.v + (e**2))**3)
@ -246,7 +239,7 @@ class StudentT(Likelihood):
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
# The comment here confuses mean and median. # The comment here confuses mean and median.
return self.gp_link.transf(mu) # only true if link is monotonic, which it is. return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):

View file

@ -362,7 +362,7 @@ class TestNoiseModels(object):
def t_dlogpdf_df(self, model, Y, f): def t_dlogpdf_df(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.description = "\n{}".format(inspect.stack()[0][3]) self.description = "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(model.logpdf, y=Y) logpdf = functools.partial(np.sum(model.logpdf), y=Y)
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g') grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
grad.randomize() grad.randomize()
@ -652,9 +652,9 @@ class LaplaceTests(unittest.TestCase):
print m2 print m2
optimizer = 'scg' optimizer = 'scg'
print "Gaussian" print "Gaussian"
m1.optimize(optimizer, messages=debug) m1.optimize(optimizer, messages=debug, ipython_notebook=False)
print "Laplace Gaussian" print "Laplace Gaussian"
m2.optimize(optimizer, messages=debug) m2.optimize(optimizer, messages=debug, ipython_notebook=False)
if debug: if debug:
print m1 print m1
print m2 print m2