Reimplemented gradients for exponential, seems to work for

laplace now, needs a visual test though
This commit is contained in:
Alan Saul 2013-10-25 12:21:11 +01:00
parent a46121c430
commit 8ef3625832
5 changed files with 134 additions and 32 deletions

View file

@ -37,7 +37,7 @@ def exponential(gp_link=None):
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
gp_link = noise_models.gp_transformations.Log_ex_1()
analytical_mean = False
analytical_variance = False

View file

@ -24,24 +24,112 @@ class Exponential(NoiseDistribution):
def _preprocess_values(self,Y):
return Y
def _mass(self,gp,obs):
def pdf_link(self, link_f, y, extra_data=None):
"""
Mass (or density) function
"""
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
Likelihood function given link(f)
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i}))
def _dnlog_mass_dgp(self,gp,obs):
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.exp(np.sum(np.log(link_f*np.exp(-y*link_f))))
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
def _d2nlog_mass_dgp2(self,gp,obs):
fgp = self.gp_link.transf(gp)
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
logpdf_link = np.sum(np.log(link_f) - y*link_f)
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
return logpdf_link
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = 1./link_f - y
#grad = y/(link_f**2) - 1./link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -1./(link_f**2)
#hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2./(link_f**3)
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3
def _mean(self,gp):
"""

View file

@ -222,21 +222,12 @@ class NoiseDistribution(object):
raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
"""
Need to check if it should even exist by checking length of getparams
"""
raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
"""
Need to check if it should even exist by checking length of getparams
"""
raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
"""
Need to check if it should even exist by checking length of getparams
"""
raise NotImplementedError
def pdf(self, f, y, extra_data=None):

View file

@ -55,7 +55,7 @@ class StudentT(NoiseDistribution):
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
#Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
@ -80,7 +80,7 @@ class StudentT(NoiseDistribution):
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
@ -105,7 +105,7 @@ class StudentT(NoiseDistribution):
:rtype: Nx1 array
"""
assert y.shape == link_f.shape
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad
@ -131,7 +131,7 @@ class StudentT(NoiseDistribution):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert y.shape == link_f.shape
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess
@ -151,7 +151,7 @@ class StudentT(NoiseDistribution):
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert y.shape == link_f.shape
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3)
@ -173,7 +173,7 @@ class StudentT(NoiseDistribution):
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
assert y.shape == link_f.shape
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar)
@ -193,7 +193,7 @@ class StudentT(NoiseDistribution):
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array
"""
assert y.shape == link_f.shape
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar
@ -213,7 +213,7 @@ class StudentT(NoiseDistribution):
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array
"""
assert y.shape == link_f.shape
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3)
@ -314,3 +314,19 @@ class StudentT(NoiseDistribution):
p_025 = mu - p
p_975 = mu + p
return mu, np.nan*mu, p_025, p_975
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param size: number of samples to compute
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
f = self.gp_link.transf(gp)
#student_t_samples = stats.t.rvs(self.v, loc=f,
#scale=np.sqrt(self.sigma2),
#size=(num_test_points, num_y_samples, num_f_samples))
#Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
return Ysim.reshape(orig_shape)

View file

@ -83,6 +83,7 @@ class TestNoiseModels(object):
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
self.f = np.random.rand(self.N, 1)
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
self.positive_Y = np.exp(self.Y.copy())
self.var = 0.2
@ -216,6 +217,12 @@ class TestNoiseModels(object):
"laplace": True,
"Y": self.binary_Y,
"ep": True
},
"Exponential_default": {
"model": GPy.likelihoods.exponential(),
"link_f_constraints": [constrain_positive],
"Y": self.positive_Y,
"laplace": True,
}
}