diff --git a/python/likelihoods/Laplace.py b/python/likelihoods/Laplace.py index 8ef8fb62..4d94ba0f 100644 --- a/python/likelihoods/Laplace.py +++ b/python/likelihoods/Laplace.py @@ -30,7 +30,7 @@ def pddet(A): class Laplace(likelihood): """Laplace approximation to a posterior""" - def __init__(self, data, likelihood_function, rasm=True): + def __init__(self, data, likelihood_function, extra_data=None, rasm=True): """ Laplace Approximation @@ -44,13 +44,15 @@ class Laplace(likelihood): Arguments --------- - :data: @todo + :data: array of data the likelihood function is approximating :likelihood_function: likelihood function - subclass of likelihood_function + :extra_data: additional data used by some likelihood functions, for example survival likelihoods need censoring data :rasm: Flag of whether to use rasmussens numerically stable mode finding or simple ncg optimisation """ self.data = data self.likelihood_function = likelihood_function + self.extra_data = extra_data self.rasm = rasm #Inital values @@ -179,7 +181,7 @@ class Laplace(likelihood): self.f_hat = self.ncg_mode(K) #At this point get the hessian matrix - self.W = -np.diag(self.likelihood_function.link_hess(self.data, self.f_hat)) + self.W = -np.diag(self.likelihood_function.link_hess(self.data, self.f_hat, extra_data=self.extra_data)) if not self.likelihood_function.log_concave: self.W[self.W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur @@ -194,7 +196,7 @@ class Laplace(likelihood): Ki_W_i = self.K - mdot(self.K, self.W_12, self.Bi, self.W_12, self.K) self.ln_Ki_W_i_det = np.linalg.det(Ki_W_i) - b = np.dot(self.W, self.f_hat) + self.likelihood_function.link_grad(self.data, self.f_hat)[:, None] + b = np.dot(self.W, self.f_hat) + self.likelihood_function.link_grad(self.data, self.f_hat, extra_data=self.extra_data)[:, None] solve_chol = cho_solve((self.B_chol, True), mdot(self.W_12, (K, b))) a = b - mdot(self.W_12, solve_chol) self.f_Ki_f = np.dot(self.f_hat.T, a) @@ -203,7 +205,7 @@ class Laplace(likelihood): self.ln_z_hat = (- 0.5*self.f_Ki_f - 0.5*self.ln_K_det + 0.5*self.ln_Ki_W_i_det - + self.likelihood_function.link_function(self.data, self.f_hat) + + self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data) ) return self._compute_GP_variables() @@ -236,16 +238,16 @@ class Laplace(likelihood): #FIXME: Can we get rid of this horrible reshaping? #ONLY WORKS FOR 1D DATA def obj(f): - res = -1 * (self.likelihood_function.link_function(self.data[:, 0], f) - 0.5 * np.dot(f.T, np.dot(self.Ki, f)) + res = -1 * (self.likelihood_function.link_function(self.data[:, 0], f, extra_data=self.extra_data) - 0.5 * np.dot(f.T, np.dot(self.Ki, f)) + self.NORMAL_CONST) return float(res) def obj_grad(f): - res = -1 * (self.likelihood_function.link_grad(self.data[:, 0], f) - np.dot(self.Ki, f)) + res = -1 * (self.likelihood_function.link_grad(self.data[:, 0], f, extra_data=self.extra_data) - np.dot(self.Ki, f)) return np.squeeze(res) def obj_hess(f): - res = -1 * (--np.diag(self.likelihood_function.link_hess(self.data[:, 0], f)) - self.Ki) + res = -1 * (--np.diag(self.likelihood_function.link_hess(self.data[:, 0], f, extra_data=self.extra_data)) - self.Ki) return np.squeeze(res) f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess) @@ -267,7 +269,7 @@ class Laplace(likelihood): def obj(a, f): #Careful of shape of data! - return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f) + return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f, extra_data=self.extra_data) difference = np.inf epsilon = 1e-6 @@ -276,7 +278,7 @@ class Laplace(likelihood): i = 0 while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART: f_old = f.copy() - W = -np.diag(self.likelihood_function.link_hess(self.data, f)) + W = -np.diag(self.likelihood_function.link_hess(self.data, f, extra_data=self.extra_data)) if not self.likelihood_function.log_concave: W[W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur # If the likelihood is non-log-concave. We wan't to say that there is a negative variance @@ -285,7 +287,7 @@ class Laplace(likelihood): B, L, W_12 = self._compute_B_statistics(K, W) W_f = np.dot(W, f) - grad = self.likelihood_function.link_grad(self.data, f)[:, None] + grad = self.likelihood_function.link_grad(self.data, f, extra_data=self.extra_data)[:, None] #Find K_i_f b = W_f + grad diff --git a/python/likelihoods/likelihood_function.py b/python/likelihoods/likelihood_function.py index 49174ce7..0d421882 100644 --- a/python/likelihoods/likelihood_function.py +++ b/python/likelihoods/likelihood_function.py @@ -4,6 +4,7 @@ import numpy as np from GPy.likelihoods.likelihood_functions import likelihood_function from scipy import stats + class student_t(likelihood_function): """Student t likelihood distribution For nomanclature see Bayesian Data Analysis 2003 p576 @@ -24,15 +25,16 @@ class student_t(likelihood_function): self.log_concave = False @property - def variance(self): + def variance(self, extra_data=None): return (self.v / float(self.v - 2)) * (self.sigma**2) - def link_function(self, y, f): + def link_function(self, y, f, extra_data=None): """link_function $\ln p(y|f)$ $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$ :y: data :f: latent variables f + :extra_data: extra_data which is not used in student t distribution :returns: float(likelihood evaluated for this point) """ @@ -49,7 +51,7 @@ class student_t(likelihood_function): ) return np.sum(objective) - def link_grad(self, y, f): + def link_grad(self, y, f, extra_data=None): """ Gradient of the link function at y, given f w.r.t f @@ -57,6 +59,7 @@ class student_t(likelihood_function): :y: data :f: latent variables f + :extra_data: extra_data which is not used in student t distribution :returns: gradient of likelihood evaluated at points """ @@ -67,17 +70,18 @@ class student_t(likelihood_function): grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2)) return np.squeeze(grad) - def link_hess(self, y, f): + def link_hess(self, y, f, extra_data=None): """ Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j i.e. second derivative link_function at y given f f_j w.r.t f and f_j - Will return diaganol of hessian, since every where else it is 0 + Will return diagonal of hessian, since every where else it is 0 $$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ :y: data :f: latent variables f + :extra_data: extra_data which is not used in student t distribution :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) """ y = np.squeeze(y) @@ -139,7 +143,7 @@ class student_t(likelihood_function): #size=(num_f_samples, num_y_samples)) #print student_t_samples.shape - student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:,None], + student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:, None], scale=self.sigma, size=(num_test_points, num_y_samples, num_f_samples)) student_t_samples = np.reshape(student_t_samples, @@ -152,7 +156,7 @@ class student_t(likelihood_function): ##Alernenately we could sample from int p(y|f*)p(f*|x*) df* def t_gaussian(f, mu, var): return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5)) - * ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2))) + * ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2))) ) def t_gauss_int(mu, var): @@ -167,4 +171,83 @@ class student_t(likelihood_function): p = vec_t_gauss_int(mu, var) p_025 = mu - p p_975 = mu + p - import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + return mu, np.nan*mu, p_025, p_975 + + +class weibull_survival(likelihood_function): + """Weibull t likelihood distribution for survival analysis with censoring + For nomanclature see Bayesian Survival Analysis + + Laplace: + Needs functions to calculate + ln p(yi|fi) + dln p(yi|fi)_dfi + d2ln p(yi|fi)_d2fifj + """ + def __init__(self, shape, scale): + self.shape = shape + self.scale = scale + + #FIXME: This should be in the superclass + self.log_concave = True + + def link_function(self, y, f, extra_data=None): + """ + link_function $\ln p(y|f)$, i.e. log likelihood + + $$\ln p(y|f) = v_{i}(\ln \alpha + (\alpha - 1)\ln y_{i} + f_{i}) - y_{i}^{\alpha}\exp(f_{i})$$ + + :y: time of event data + :f: latent variables f + :extra_data: the censoring indicator, 1 for censored, 0 for not + :returns: float(likelihood evaluated for this point) + + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + + v = extra_data + objective = v*(np.log(self.shape) + (self.shape - 1)*np.log(y) + f) - (y**self.shape)*np.exp(f) # FIXME: CHECK THIS WITH BOOK, wheres scale? + return np.sum(objective) + + def link_grad(self, y, f, extra_data=None): + """ + Gradient of the link function at y, given f w.r.t f + + $$\frac{d}{df} \ln p(y_{i}|f_{i}) = v_{i} - y_{i}\exp(f_{i}) + + :y: data + :f: latent variables f + :extra_data: the censoring indicator, 1 for censored, 0 for not + :returns: gradient of likelihood evaluated at points + + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + + v = extra_data + grad = v - (y**self.shape)*np.exp(f) + return np.squeeze(grad) + + def link_hess(self, y, f, extra_data=None): + """ + Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j + i.e. second derivative link_function at y given f f_j w.r.t f and f_j + + Will return diagonal of hessian, since every where else it is 0 + + $$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ + + :y: data + :f: latent variables f + :extra_data: extra_data which is not used hessian + :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + + hess = (y**self.shape)*np.exp(f) + return np.squeeze(hess)