Reimplemented gradients for exponential, seems to work for

laplace now, needs a visual test though
This commit is contained in:
Alan Saul 2013-10-25 12:21:11 +01:00
parent a46121c430
commit 8ef3625832
5 changed files with 134 additions and 32 deletions

View file

@ -37,7 +37,7 @@ def exponential(gp_link=None):
:param gp_link: a GPy gp_link function :param gp_link: a GPy gp_link function
""" """
if gp_link is None: if gp_link is None:
gp_link = noise_models.gp_transformations.Identity() gp_link = noise_models.gp_transformations.Log_ex_1()
analytical_mean = False analytical_mean = False
analytical_variance = False analytical_variance = False

View file

@ -24,24 +24,112 @@ class Exponential(NoiseDistribution):
def _preprocess_values(self,Y): def _preprocess_values(self,Y):
return Y return Y
def _mass(self,gp,obs): def pdf_link(self, link_f, y, extra_data=None):
""" """
Mass (or density) function Likelihood function given link(f)
"""
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
def _nlog_mass(self,gp,obs): .. math::
""" p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i}))
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
def _dnlog_mass_dgp(self,gp,obs): :param link_f: latent variables link(f)
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp) :type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.exp(np.sum(np.log(link_f*np.exp(-y*link_f))))
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
def _d2nlog_mass_dgp2(self,gp,obs): def logpdf_link(self, link_f, y, extra_data=None):
fgp = self.gp_link.transf(gp) """
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp) Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
logpdf_link = np.sum(np.log(link_f) - y*link_f)
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
return logpdf_link
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = 1./link_f - y
#grad = y/(link_f**2) - 1./link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -1./(link_f**2)
#hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2./(link_f**3)
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3
def _mean(self,gp): def _mean(self,gp):
""" """

View file

@ -222,21 +222,12 @@ class NoiseDistribution(object):
raise NotImplementedError raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None): def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
"""
Need to check if it should even exist by checking length of getparams
"""
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None): def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
"""
Need to check if it should even exist by checking length of getparams
"""
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None): def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
"""
Need to check if it should even exist by checking length of getparams
"""
raise NotImplementedError raise NotImplementedError
def pdf(self, f, y, extra_data=None): def pdf(self, f, y, extra_data=None):

View file

@ -55,7 +55,7 @@ class StudentT(NoiseDistribution):
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
#Careful gamma(big_number) is infinity! #Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
@ -80,7 +80,7 @@ class StudentT(NoiseDistribution):
:rtype: float :rtype: float
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
objective = (+ gammaln((self.v + 1) * 0.5) objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5) - gammaln(self.v * 0.5)
@ -105,7 +105,7 @@ class StudentT(NoiseDistribution):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert y.shape == link_f.shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad return grad
@ -131,7 +131,7 @@ class StudentT(NoiseDistribution):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert y.shape == link_f.shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
@ -151,7 +151,7 @@ class StudentT(NoiseDistribution):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert y.shape == link_f.shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3) ((e**2 + self.sigma2*self.v)**3)
@ -173,7 +173,7 @@ class StudentT(NoiseDistribution):
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert y.shape == link_f.shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar) return np.sum(dlogpdf_dvar)
@ -193,7 +193,7 @@ class StudentT(NoiseDistribution):
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert y.shape == link_f.shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar return dlogpdf_dlink_dvar
@ -213,7 +213,7 @@ class StudentT(NoiseDistribution):
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert y.shape == link_f.shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3) / ((self.sigma2*self.v + (e**2))**3)
@ -314,3 +314,19 @@ class StudentT(NoiseDistribution):
p_025 = mu - p p_025 = mu - p
p_975 = mu + p p_975 = mu + p
return mu, np.nan*mu, p_025, p_975 return mu, np.nan*mu, p_025, p_975
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param size: number of samples to compute
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
f = self.gp_link.transf(gp)
#student_t_samples = stats.t.rvs(self.v, loc=f,
#scale=np.sqrt(self.sigma2),
#size=(num_test_points, num_y_samples, num_f_samples))
#Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
return Ysim.reshape(orig_shape)

View file

@ -83,6 +83,7 @@ class TestNoiseModels(object):
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None] self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
self.f = np.random.rand(self.N, 1) self.f = np.random.rand(self.N, 1)
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None] self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
self.positive_Y = np.exp(self.Y.copy())
self.var = 0.2 self.var = 0.2
@ -216,6 +217,12 @@ class TestNoiseModels(object):
"laplace": True, "laplace": True,
"Y": self.binary_Y, "Y": self.binary_Y,
"ep": True "ep": True
},
"Exponential_default": {
"model": GPy.likelihoods.exponential(),
"link_f_constraints": [constrain_positive],
"Y": self.positive_Y,
"laplace": True,
} }
} }