mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
Reimplemented gradients for exponential, seems to work for
laplace now, needs a visual test though
This commit is contained in:
parent
a46121c430
commit
8ef3625832
5 changed files with 134 additions and 32 deletions
|
|
@ -37,7 +37,7 @@ def exponential(gp_link=None):
|
||||||
:param gp_link: a GPy gp_link function
|
:param gp_link: a GPy gp_link function
|
||||||
"""
|
"""
|
||||||
if gp_link is None:
|
if gp_link is None:
|
||||||
gp_link = noise_models.gp_transformations.Identity()
|
gp_link = noise_models.gp_transformations.Log_ex_1()
|
||||||
|
|
||||||
analytical_mean = False
|
analytical_mean = False
|
||||||
analytical_variance = False
|
analytical_variance = False
|
||||||
|
|
|
||||||
|
|
@ -24,24 +24,112 @@ class Exponential(NoiseDistribution):
|
||||||
def _preprocess_values(self,Y):
|
def _preprocess_values(self,Y):
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Mass (or density) function
|
Likelihood function given link(f)
|
||||||
"""
|
|
||||||
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
.. math::
|
||||||
"""
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i}))
|
||||||
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
|
||||||
"""
|
|
||||||
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
:param link_f: latent variables link(f)
|
||||||
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
return np.exp(np.sum(np.log(link_f*np.exp(-y*link_f))))
|
||||||
|
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
def logpdf_link(self, link_f, y, extra_data=None):
|
||||||
fgp = self.gp_link.transf(gp)
|
"""
|
||||||
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
|
||||||
|
|
||||||
|
:param link_f: latent variables (link(f))
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
logpdf_link = np.sum(np.log(link_f) - y*link_f)
|
||||||
|
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
|
||||||
|
return logpdf_link
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
|
||||||
|
|
||||||
|
:param link_f: latent variables (f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
grad = 1./link_f - y
|
||||||
|
#grad = y/(link_f**2) - 1./link_f
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
The hessian will be 0 unless i == j
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
hess = -1./(link_f**2)
|
||||||
|
#hess = -2*y/(link_f**3) + 1/(link_f**2)
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
|
||||||
|
|
||||||
|
:param link_f: latent variables link(f)
|
||||||
|
:type link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param extra_data: extra_data which is not used in exponential distribution
|
||||||
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
|
:rtype: Nx1 array
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
|
d3lik_dlink3 = 2./(link_f**3)
|
||||||
|
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
||||||
|
return d3lik_dlink3
|
||||||
|
|
||||||
def _mean(self,gp):
|
def _mean(self,gp):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -222,21 +222,12 @@ class NoiseDistribution(object):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
|
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
|
||||||
"""
|
|
||||||
Need to check if it should even exist by checking length of getparams
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
|
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
|
||||||
"""
|
|
||||||
Need to check if it should even exist by checking length of getparams
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
|
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
|
||||||
"""
|
|
||||||
Need to check if it should even exist by checking length of getparams
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def pdf(self, f, y, extra_data=None):
|
def pdf(self, f, y, extra_data=None):
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ class StudentT(NoiseDistribution):
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - link_f
|
||||||
#Careful gamma(big_number) is infinity!
|
#Careful gamma(big_number) is infinity!
|
||||||
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
||||||
|
|
@ -80,7 +80,7 @@ class StudentT(NoiseDistribution):
|
||||||
:rtype: float
|
:rtype: float
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - link_f
|
||||||
objective = (+ gammaln((self.v + 1) * 0.5)
|
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||||
- gammaln(self.v * 0.5)
|
- gammaln(self.v * 0.5)
|
||||||
|
|
@ -105,7 +105,7 @@ class StudentT(NoiseDistribution):
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert y.shape == link_f.shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - link_f
|
||||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||||
return grad
|
return grad
|
||||||
|
|
@ -131,7 +131,7 @@ class StudentT(NoiseDistribution):
|
||||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
"""
|
"""
|
||||||
assert y.shape == link_f.shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - link_f
|
||||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||||
return hess
|
return hess
|
||||||
|
|
@ -151,7 +151,7 @@ class StudentT(NoiseDistribution):
|
||||||
:returns: third derivative of likelihood evaluated at points f
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert y.shape == link_f.shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - link_f
|
||||||
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||||
((e**2 + self.sigma2*self.v)**3)
|
((e**2 + self.sigma2*self.v)**3)
|
||||||
|
|
@ -173,7 +173,7 @@ class StudentT(NoiseDistribution):
|
||||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
assert y.shape == link_f.shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - link_f
|
||||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||||
return np.sum(dlogpdf_dvar)
|
return np.sum(dlogpdf_dvar)
|
||||||
|
|
@ -193,7 +193,7 @@ class StudentT(NoiseDistribution):
|
||||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert y.shape == link_f.shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - link_f
|
||||||
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||||
return dlogpdf_dlink_dvar
|
return dlogpdf_dlink_dvar
|
||||||
|
|
@ -213,7 +213,7 @@ class StudentT(NoiseDistribution):
|
||||||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert y.shape == link_f.shape
|
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - link_f
|
||||||
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||||
/ ((self.sigma2*self.v + (e**2))**3)
|
/ ((self.sigma2*self.v + (e**2))**3)
|
||||||
|
|
@ -314,3 +314,19 @@ class StudentT(NoiseDistribution):
|
||||||
p_025 = mu - p
|
p_025 = mu - p
|
||||||
p_975 = mu + p
|
p_975 = mu + p
|
||||||
return mu, np.nan*mu, p_025, p_975
|
return mu, np.nan*mu, p_025, p_975
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param size: number of samples to compute
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
orig_shape = gp.shape
|
||||||
|
gp = gp.flatten()
|
||||||
|
f = self.gp_link.transf(gp)
|
||||||
|
#student_t_samples = stats.t.rvs(self.v, loc=f,
|
||||||
|
#scale=np.sqrt(self.sigma2),
|
||||||
|
#size=(num_test_points, num_y_samples, num_f_samples))
|
||||||
|
#Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
|
||||||
|
return Ysim.reshape(orig_shape)
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,7 @@ class TestNoiseModels(object):
|
||||||
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
|
self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None]
|
||||||
self.f = np.random.rand(self.N, 1)
|
self.f = np.random.rand(self.N, 1)
|
||||||
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
|
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
|
||||||
|
self.positive_Y = np.exp(self.Y.copy())
|
||||||
|
|
||||||
self.var = 0.2
|
self.var = 0.2
|
||||||
|
|
||||||
|
|
@ -216,6 +217,12 @@ class TestNoiseModels(object):
|
||||||
"laplace": True,
|
"laplace": True,
|
||||||
"Y": self.binary_Y,
|
"Y": self.binary_Y,
|
||||||
"ep": True
|
"ep": True
|
||||||
|
},
|
||||||
|
"Exponential_default": {
|
||||||
|
"model": GPy.likelihoods.exponential(),
|
||||||
|
"link_f_constraints": [constrain_positive],
|
||||||
|
"Y": self.positive_Y,
|
||||||
|
"laplace": True,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue