mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-29 23:06:22 +02:00
Adding weibull likelihood, requires 'extra_data' to be passed to likelihood, i.e. the censoring information
This commit is contained in:
parent
65481d7a73
commit
9bbb11b825
2 changed files with 104 additions and 19 deletions
|
|
@ -30,7 +30,7 @@ def pddet(A):
|
|||
class Laplace(likelihood):
|
||||
"""Laplace approximation to a posterior"""
|
||||
|
||||
def __init__(self, data, likelihood_function, rasm=True):
|
||||
def __init__(self, data, likelihood_function, extra_data=None, rasm=True):
|
||||
"""
|
||||
Laplace Approximation
|
||||
|
||||
|
|
@ -44,13 +44,15 @@ class Laplace(likelihood):
|
|||
Arguments
|
||||
---------
|
||||
|
||||
:data: @todo
|
||||
:data: array of data the likelihood function is approximating
|
||||
:likelihood_function: likelihood function - subclass of likelihood_function
|
||||
:extra_data: additional data used by some likelihood functions, for example survival likelihoods need censoring data
|
||||
:rasm: Flag of whether to use rasmussens numerically stable mode finding or simple ncg optimisation
|
||||
|
||||
"""
|
||||
self.data = data
|
||||
self.likelihood_function = likelihood_function
|
||||
self.extra_data = extra_data
|
||||
self.rasm = rasm
|
||||
|
||||
#Inital values
|
||||
|
|
@ -179,7 +181,7 @@ class Laplace(likelihood):
|
|||
self.f_hat = self.ncg_mode(K)
|
||||
|
||||
#At this point get the hessian matrix
|
||||
self.W = -np.diag(self.likelihood_function.link_hess(self.data, self.f_hat))
|
||||
self.W = -np.diag(self.likelihood_function.link_hess(self.data, self.f_hat, extra_data=self.extra_data))
|
||||
|
||||
if not self.likelihood_function.log_concave:
|
||||
self.W[self.W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
|
|
@ -194,7 +196,7 @@ class Laplace(likelihood):
|
|||
Ki_W_i = self.K - mdot(self.K, self.W_12, self.Bi, self.W_12, self.K)
|
||||
self.ln_Ki_W_i_det = np.linalg.det(Ki_W_i)
|
||||
|
||||
b = np.dot(self.W, self.f_hat) + self.likelihood_function.link_grad(self.data, self.f_hat)[:, None]
|
||||
b = np.dot(self.W, self.f_hat) + self.likelihood_function.link_grad(self.data, self.f_hat, extra_data=self.extra_data)[:, None]
|
||||
solve_chol = cho_solve((self.B_chol, True), mdot(self.W_12, (K, b)))
|
||||
a = b - mdot(self.W_12, solve_chol)
|
||||
self.f_Ki_f = np.dot(self.f_hat.T, a)
|
||||
|
|
@ -203,7 +205,7 @@ class Laplace(likelihood):
|
|||
self.ln_z_hat = (- 0.5*self.f_Ki_f
|
||||
- 0.5*self.ln_K_det
|
||||
+ 0.5*self.ln_Ki_W_i_det
|
||||
+ self.likelihood_function.link_function(self.data, self.f_hat)
|
||||
+ self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
|
||||
)
|
||||
|
||||
return self._compute_GP_variables()
|
||||
|
|
@ -236,16 +238,16 @@ class Laplace(likelihood):
|
|||
#FIXME: Can we get rid of this horrible reshaping?
|
||||
#ONLY WORKS FOR 1D DATA
|
||||
def obj(f):
|
||||
res = -1 * (self.likelihood_function.link_function(self.data[:, 0], f) - 0.5 * np.dot(f.T, np.dot(self.Ki, f))
|
||||
res = -1 * (self.likelihood_function.link_function(self.data[:, 0], f, extra_data=self.extra_data) - 0.5 * np.dot(f.T, np.dot(self.Ki, f))
|
||||
+ self.NORMAL_CONST)
|
||||
return float(res)
|
||||
|
||||
def obj_grad(f):
|
||||
res = -1 * (self.likelihood_function.link_grad(self.data[:, 0], f) - np.dot(self.Ki, f))
|
||||
res = -1 * (self.likelihood_function.link_grad(self.data[:, 0], f, extra_data=self.extra_data) - np.dot(self.Ki, f))
|
||||
return np.squeeze(res)
|
||||
|
||||
def obj_hess(f):
|
||||
res = -1 * (--np.diag(self.likelihood_function.link_hess(self.data[:, 0], f)) - self.Ki)
|
||||
res = -1 * (--np.diag(self.likelihood_function.link_hess(self.data[:, 0], f, extra_data=self.extra_data)) - self.Ki)
|
||||
return np.squeeze(res)
|
||||
|
||||
f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess)
|
||||
|
|
@ -267,7 +269,7 @@ class Laplace(likelihood):
|
|||
|
||||
def obj(a, f):
|
||||
#Careful of shape of data!
|
||||
return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f)
|
||||
return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f, extra_data=self.extra_data)
|
||||
|
||||
difference = np.inf
|
||||
epsilon = 1e-6
|
||||
|
|
@ -276,7 +278,7 @@ class Laplace(likelihood):
|
|||
i = 0
|
||||
while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
|
||||
f_old = f.copy()
|
||||
W = -np.diag(self.likelihood_function.link_hess(self.data, f))
|
||||
W = -np.diag(self.likelihood_function.link_hess(self.data, f, extra_data=self.extra_data))
|
||||
if not self.likelihood_function.log_concave:
|
||||
W[W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
|
|
@ -285,7 +287,7 @@ class Laplace(likelihood):
|
|||
B, L, W_12 = self._compute_B_statistics(K, W)
|
||||
|
||||
W_f = np.dot(W, f)
|
||||
grad = self.likelihood_function.link_grad(self.data, f)[:, None]
|
||||
grad = self.likelihood_function.link_grad(self.data, f, extra_data=self.extra_data)[:, None]
|
||||
#Find K_i_f
|
||||
b = W_f + grad
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import numpy as np
|
|||
from GPy.likelihoods.likelihood_functions import likelihood_function
|
||||
from scipy import stats
|
||||
|
||||
|
||||
class student_t(likelihood_function):
|
||||
"""Student t likelihood distribution
|
||||
For nomanclature see Bayesian Data Analysis 2003 p576
|
||||
|
|
@ -24,15 +25,16 @@ class student_t(likelihood_function):
|
|||
self.log_concave = False
|
||||
|
||||
@property
|
||||
def variance(self):
|
||||
def variance(self, extra_data=None):
|
||||
return (self.v / float(self.v - 2)) * (self.sigma**2)
|
||||
|
||||
def link_function(self, y, f):
|
||||
def link_function(self, y, f, extra_data=None):
|
||||
"""link_function $\ln p(y|f)$
|
||||
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$
|
||||
|
||||
:y: data
|
||||
:f: latent variables f
|
||||
:extra_data: extra_data which is not used in student t distribution
|
||||
:returns: float(likelihood evaluated for this point)
|
||||
|
||||
"""
|
||||
|
|
@ -49,7 +51,7 @@ class student_t(likelihood_function):
|
|||
)
|
||||
return np.sum(objective)
|
||||
|
||||
def link_grad(self, y, f):
|
||||
def link_grad(self, y, f, extra_data=None):
|
||||
"""
|
||||
Gradient of the link function at y, given f w.r.t f
|
||||
|
||||
|
|
@ -57,6 +59,7 @@ class student_t(likelihood_function):
|
|||
|
||||
:y: data
|
||||
:f: latent variables f
|
||||
:extra_data: extra_data which is not used in student t distribution
|
||||
:returns: gradient of likelihood evaluated at points
|
||||
|
||||
"""
|
||||
|
|
@ -67,17 +70,18 @@ class student_t(likelihood_function):
|
|||
grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2))
|
||||
return np.squeeze(grad)
|
||||
|
||||
def link_hess(self, y, f):
|
||||
def link_hess(self, y, f, extra_data=None):
|
||||
"""
|
||||
Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
|
||||
i.e. second derivative link_function at y given f f_j w.r.t f and f_j
|
||||
|
||||
Will return diaganol of hessian, since every where else it is 0
|
||||
Will return diagonal of hessian, since every where else it is 0
|
||||
|
||||
$$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$
|
||||
|
||||
:y: data
|
||||
:f: latent variables f
|
||||
:extra_data: extra_data which is not used in student t distribution
|
||||
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
|
||||
"""
|
||||
y = np.squeeze(y)
|
||||
|
|
@ -139,7 +143,7 @@ class student_t(likelihood_function):
|
|||
#size=(num_f_samples, num_y_samples))
|
||||
#print student_t_samples.shape
|
||||
|
||||
student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:,None],
|
||||
student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:, None],
|
||||
scale=self.sigma,
|
||||
size=(num_test_points, num_y_samples, num_f_samples))
|
||||
student_t_samples = np.reshape(student_t_samples,
|
||||
|
|
@ -152,7 +156,7 @@ class student_t(likelihood_function):
|
|||
##Alernenately we could sample from int p(y|f*)p(f*|x*) df*
|
||||
def t_gaussian(f, mu, var):
|
||||
return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5))
|
||||
* ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2)))
|
||||
* ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2)))
|
||||
)
|
||||
|
||||
def t_gauss_int(mu, var):
|
||||
|
|
@ -167,4 +171,83 @@ class student_t(likelihood_function):
|
|||
p = vec_t_gauss_int(mu, var)
|
||||
p_025 = mu - p
|
||||
p_975 = mu + p
|
||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||
return mu, np.nan*mu, p_025, p_975
|
||||
|
||||
|
||||
class weibull_survival(likelihood_function):
|
||||
"""Weibull t likelihood distribution for survival analysis with censoring
|
||||
For nomanclature see Bayesian Survival Analysis
|
||||
|
||||
Laplace:
|
||||
Needs functions to calculate
|
||||
ln p(yi|fi)
|
||||
dln p(yi|fi)_dfi
|
||||
d2ln p(yi|fi)_d2fifj
|
||||
"""
|
||||
def __init__(self, shape, scale):
|
||||
self.shape = shape
|
||||
self.scale = scale
|
||||
|
||||
#FIXME: This should be in the superclass
|
||||
self.log_concave = True
|
||||
|
||||
def link_function(self, y, f, extra_data=None):
|
||||
"""
|
||||
link_function $\ln p(y|f)$, i.e. log likelihood
|
||||
|
||||
$$\ln p(y|f) = v_{i}(\ln \alpha + (\alpha - 1)\ln y_{i} + f_{i}) - y_{i}^{\alpha}\exp(f_{i})$$
|
||||
|
||||
:y: time of event data
|
||||
:f: latent variables f
|
||||
:extra_data: the censoring indicator, 1 for censored, 0 for not
|
||||
:returns: float(likelihood evaluated for this point)
|
||||
|
||||
"""
|
||||
y = np.squeeze(y)
|
||||
f = np.squeeze(f)
|
||||
assert y.shape == f.shape
|
||||
|
||||
v = extra_data
|
||||
objective = v*(np.log(self.shape) + (self.shape - 1)*np.log(y) + f) - (y**self.shape)*np.exp(f) # FIXME: CHECK THIS WITH BOOK, wheres scale?
|
||||
return np.sum(objective)
|
||||
|
||||
def link_grad(self, y, f, extra_data=None):
|
||||
"""
|
||||
Gradient of the link function at y, given f w.r.t f
|
||||
|
||||
$$\frac{d}{df} \ln p(y_{i}|f_{i}) = v_{i} - y_{i}\exp(f_{i})
|
||||
|
||||
:y: data
|
||||
:f: latent variables f
|
||||
:extra_data: the censoring indicator, 1 for censored, 0 for not
|
||||
:returns: gradient of likelihood evaluated at points
|
||||
|
||||
"""
|
||||
y = np.squeeze(y)
|
||||
f = np.squeeze(f)
|
||||
assert y.shape == f.shape
|
||||
|
||||
v = extra_data
|
||||
grad = v - (y**self.shape)*np.exp(f)
|
||||
return np.squeeze(grad)
|
||||
|
||||
def link_hess(self, y, f, extra_data=None):
|
||||
"""
|
||||
Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
|
||||
i.e. second derivative link_function at y given f f_j w.r.t f and f_j
|
||||
|
||||
Will return diagonal of hessian, since every where else it is 0
|
||||
|
||||
$$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$
|
||||
|
||||
:y: data
|
||||
:f: latent variables f
|
||||
:extra_data: extra_data which is not used hessian
|
||||
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
|
||||
"""
|
||||
y = np.squeeze(y)
|
||||
f = np.squeeze(f)
|
||||
assert y.shape == f.shape
|
||||
|
||||
hess = (y**self.shape)*np.exp(f)
|
||||
return np.squeeze(hess)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue