diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index 07801150..5d1c1224 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -164,8 +164,8 @@ def student_t_approx(): ###with a student t distribution, since it has heavy tails it should work well - ###likelihood_functions = student_t(deg_free, sigma=real_var) - ###lap = Laplace(Y, likelihood_functions) + ###likelihood_function = student_t(deg_free, sigma=real_var) + ###lap = Laplace(Y, likelihood_function) ###cov = kernel.K(X) ###lap.fit_full(cov) diff --git a/GPy/likelihoods/likelihood_function.py b/GPy/likelihoods/likelihood_function.py deleted file mode 100644 index f14faf33..00000000 --- a/GPy/likelihoods/likelihood_function.py +++ /dev/null @@ -1,253 +0,0 @@ -from scipy.special import gammaln, gamma -from scipy import integrate -import numpy as np -from GPy.likelihoods.likelihood_functions import likelihood_function -from scipy import stats - - -class student_t(likelihood_function): - """Student t likelihood distribution - For nomanclature see Bayesian Data Analysis 2003 p576 - - $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2)$$ - - Laplace: - Needs functions to calculate - ln p(yi|fi) - dln p(yi|fi)_dfi - d2ln p(yi|fi)_d2fifj - """ - def __init__(self, deg_free, sigma=2): - self.v = deg_free - self.sigma = sigma - - #FIXME: This should be in the superclass - self.log_concave = False - - @property - def variance(self, extra_data=None): - return (self.v / float(self.v - 2)) * (self.sigma**2) - - def link_function(self, y, f, extra_data=None): - """link_function $\ln p(y|f)$ - $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$ - - :y: data - :f: latent variables f - :extra_data: extra_data which is not used in student t distribution - :returns: float(likelihood evaluated for this point) - - """ - y = np.squeeze(y) - f = np.squeeze(f) - assert y.shape == f.shape - - e = y - f - objective = (gammaln((self.v + 1) * 0.5) - - gammaln(self.v * 0.5) - + np.log(self.sigma * np.sqrt(self.v * np.pi)) - - (self.v + 1) * 0.5 - * np.log(1 + ((e**2 / self.sigma**2) / self.v)) - ) - return np.sum(objective) - - def link_grad(self, y, f, extra_data=None): - """ - Gradient of the link function at y, given f w.r.t f - - $$\frac{d}{df}p(y_{i}|f_{i}) = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ - - :y: data - :f: latent variables f - :extra_data: extra_data which is not used in student t distribution - :returns: gradient of likelihood evaluated at points - - """ - y = np.squeeze(y) - f = np.squeeze(f) - assert y.shape == f.shape - e = y - f - grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2)) - return np.squeeze(grad) - - def link_hess(self, y, f, extra_data=None): - """ - Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j - i.e. second derivative link_function at y given f f_j w.r.t f and f_j - - Will return diagonal of hessian, since every where else it is 0 - - $$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ - - :y: data - :f: latent variables f - :extra_data: extra_data which is not used in student t distribution - :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) - """ - y = np.squeeze(y) - f = np.squeeze(f) - assert y.shape == f.shape - e = y - f - hess = ((self.v + 1)*(e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2)*self.v) + e**2)**2) - return np.squeeze(hess) - - def predictive_values(self, mu, var): - """ - Compute mean, and conficence interval (percentiles 5 and 95) of the prediction - - Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*) - (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2)) - *((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2))) - """ - - #We want the variance around test points y which comes from int p(y*|f*)p(f*) df* - #Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)] - #Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this - #Which was also given to us as (var) - #We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution - #However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom - true_var = var + self.variance - - #Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now - #need the 95 and 5 percentiles. - #FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles - p_025 = mu - 2.*true_var - p_975 = mu + 2.*true_var - - return mu, np.nan*mu, p_025, p_975 - - def sample_predicted_values(self, mu, var): - """ Experimental sample approches and numerical integration """ - #p_025 = stats.t.ppf(.025, mu) - #p_975 = stats.t.ppf(.975, mu) - - num_test_points = mu.shape[0] - #Each mu is the latent point f* at the test point x*, - #and the var is the gaussian variance at this point - #Take lots of samples from this, so we have lots of possible values - #for latent point f* for each test point x* weighted by how likely we were to pick it - print "Taking %d samples of f*".format(num_test_points) - num_f_samples = 10 - num_y_samples = 10 - student_t_means = np.random.normal(loc=mu, scale=np.sqrt(var), size=(num_test_points, num_f_samples)) - print "Student t means shape: ", student_t_means.shape - - #Now we have lots of f*, lets work out the likelihood of getting this by sampling - #from a student t centred on this point, sample many points from this distribution - #centred on f* - #for test_point, f in enumerate(student_t_means): - #print test_point - #print f.shape - #student_t_samples = stats.t.rvs(self.v, loc=f[:,None], - #scale=self.sigma, - #size=(num_f_samples, num_y_samples)) - #print student_t_samples.shape - - student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:, None], - scale=self.sigma, - size=(num_test_points, num_y_samples, num_f_samples)) - student_t_samples = np.reshape(student_t_samples, - (num_test_points, num_y_samples*num_f_samples)) - - #Now take the 97.5 and 0.25 percentile of these points - p_025 = stats.scoreatpercentile(student_t_samples, .025, axis=1)[:, None] - p_975 = stats.scoreatpercentile(student_t_samples, .975, axis=1)[:, None] - - ##Alernenately we could sample from int p(y|f*)p(f*|x*) df* - def t_gaussian(f, mu, var): - return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5)) - * ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2))) - ) - - def t_gauss_int(mu, var): - print "Mu: ", mu - print "var: ", var - result = integrate.quad(t_gaussian, 0.025, 0.975, args=(mu, var)) - print "Result: ", result - return result[0] - - vec_t_gauss_int = np.vectorize(t_gauss_int) - - p = vec_t_gauss_int(mu, var) - p_025 = mu - p - p_975 = mu + p - return mu, np.nan*mu, p_025, p_975 - - -class weibull_survival(likelihood_function): - """Weibull t likelihood distribution for survival analysis with censoring - For nomanclature see Bayesian Survival Analysis - - Laplace: - Needs functions to calculate - ln p(yi|fi) - dln p(yi|fi)_dfi - d2ln p(yi|fi)_d2fifj - """ - def __init__(self, shape, scale): - self.shape = shape - self.scale = scale - - #FIXME: This should be in the superclass - self.log_concave = True - - def link_function(self, y, f, extra_data=None): - """ - link_function $\ln p(y|f)$, i.e. log likelihood - - $$\ln p(y|f) = v_{i}(\ln \alpha + (\alpha - 1)\ln y_{i} + f_{i}) - y_{i}^{\alpha}\exp(f_{i})$$ - - :y: time of event data - :f: latent variables f - :extra_data: the censoring indicator, 1 for censored, 0 for not - :returns: float(likelihood evaluated for this point) - - """ - y = np.squeeze(y) - f = np.squeeze(f) - assert y.shape == f.shape - - v = extra_data - objective = v*(np.log(self.shape) + (self.shape - 1)*np.log(y) + f) - (y**self.shape)*np.exp(f) # FIXME: CHECK THIS WITH BOOK, wheres scale? - return np.sum(objective) - - def link_grad(self, y, f, extra_data=None): - """ - Gradient of the link function at y, given f w.r.t f - - $$\frac{d}{df} \ln p(y_{i}|f_{i}) = v_{i} - y_{i}\exp(f_{i}) - - :y: data - :f: latent variables f - :extra_data: the censoring indicator, 1 for censored, 0 for not - :returns: gradient of likelihood evaluated at points - - """ - y = np.squeeze(y) - f = np.squeeze(f) - assert y.shape == f.shape - - v = extra_data - grad = v - (y**self.shape)*np.exp(f) - return np.squeeze(grad) - - def link_hess(self, y, f, extra_data=None): - """ - Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j - i.e. second derivative link_function at y given f f_j w.r.t f and f_j - - Will return diagonal of hessian, since every where else it is 0 - - $$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ - - :y: data - :f: latent variables f - :extra_data: extra_data which is not used hessian - :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) - """ - y = np.squeeze(y) - f = np.squeeze(f) - assert y.shape == f.shape - - hess = (y**self.shape)*np.exp(f) - return np.squeeze(hess) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 4b8e7013..c759e15f 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -1,12 +1,14 @@ # Copyright (c) 2012, 2013 Ricardo Andrade # Licensed under the BSD 3-clause license (see LICENSE.txt) - import numpy as np -from scipy import stats +from scipy import stats, integrate import scipy as sp import pylab as pb from ..util.plot import gpplot +from scipy.special import gammaln, gamma +#from GPy.likelihoods.likelihood_functions import likelihood_function + class likelihood_function: """ @@ -132,3 +134,251 @@ class Poisson(likelihood_function): p_025 = tmp[:,0] p_975 = tmp[:,1] return mean,np.nan*mean,p_025,p_975 # better variance here TODO + + +class student_t(likelihood_function): + """Student t likelihood distribution + For nomanclature see Bayesian Data Analysis 2003 p576 + + $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2)$$ + + Laplace: + Needs functions to calculate + ln p(yi|fi) + dln p(yi|fi)_dfi + d2ln p(yi|fi)_d2fifj + """ + def __init__(self, deg_free, sigma=2): + self.v = deg_free + self.sigma = sigma + + #FIXME: This should be in the superclass + self.log_concave = False + + @property + def variance(self, extra_data=None): + return (self.v / float(self.v - 2)) * (self.sigma**2) + + def link_function(self, y, f, extra_data=None): + """link_function $\ln p(y|f)$ + $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$ + + :y: data + :f: latent variables f + :extra_data: extra_data which is not used in student t distribution + :returns: float(likelihood evaluated for this point) + + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + + e = y - f + objective = (gammaln((self.v + 1) * 0.5) + - gammaln(self.v * 0.5) + + np.log(self.sigma * np.sqrt(self.v * np.pi)) + - (self.v + 1) * 0.5 + * np.log(1 + ((e**2 / self.sigma**2) / self.v)) + ) + return np.sum(objective) + + def link_grad(self, y, f, extra_data=None): + """ + Gradient of the link function at y, given f w.r.t f + + $$\frac{d}{df}p(y_{i}|f_{i}) = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ + + :y: data + :f: latent variables f + :extra_data: extra_data which is not used in student t distribution + :returns: gradient of likelihood evaluated at points + + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + e = y - f + grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2)) + return np.squeeze(grad) + + def link_hess(self, y, f, extra_data=None): + """ + Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j + i.e. second derivative link_function at y given f f_j w.r.t f and f_j + + Will return diagonal of hessian, since every where else it is 0 + + $$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ + + :y: data + :f: latent variables f + :extra_data: extra_data which is not used in student t distribution + :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + e = y - f + hess = ((self.v + 1)*(e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2)*self.v) + e**2)**2) + return np.squeeze(hess) + + def predictive_values(self, mu, var): + """ + Compute mean, and conficence interval (percentiles 5 and 95) of the prediction + + Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*) + (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2)) + *((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2))) + """ + + #We want the variance around test points y which comes from int p(y*|f*)p(f*) df* + #Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)] + #Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this + #Which was also given to us as (var) + #We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution + #However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom + true_var = var + self.variance + + #Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now + #need the 95 and 5 percentiles. + #FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles + p_025 = mu - 2.*true_var + p_975 = mu + 2.*true_var + + return mu, np.nan*mu, p_025, p_975 + + def sample_predicted_values(self, mu, var): + """ Experimental sample approches and numerical integration """ + #p_025 = stats.t.ppf(.025, mu) + #p_975 = stats.t.ppf(.975, mu) + + num_test_points = mu.shape[0] + #Each mu is the latent point f* at the test point x*, + #and the var is the gaussian variance at this point + #Take lots of samples from this, so we have lots of possible values + #for latent point f* for each test point x* weighted by how likely we were to pick it + print "Taking %d samples of f*".format(num_test_points) + num_f_samples = 10 + num_y_samples = 10 + student_t_means = np.random.normal(loc=mu, scale=np.sqrt(var), size=(num_test_points, num_f_samples)) + print "Student t means shape: ", student_t_means.shape + + #Now we have lots of f*, lets work out the likelihood of getting this by sampling + #from a student t centred on this point, sample many points from this distribution + #centred on f* + #for test_point, f in enumerate(student_t_means): + #print test_point + #print f.shape + #student_t_samples = stats.t.rvs(self.v, loc=f[:,None], + #scale=self.sigma, + #size=(num_f_samples, num_y_samples)) + #print student_t_samples.shape + + student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:, None], + scale=self.sigma, + size=(num_test_points, num_y_samples, num_f_samples)) + student_t_samples = np.reshape(student_t_samples, + (num_test_points, num_y_samples*num_f_samples)) + + #Now take the 97.5 and 0.25 percentile of these points + p_025 = stats.scoreatpercentile(student_t_samples, .025, axis=1)[:, None] + p_975 = stats.scoreatpercentile(student_t_samples, .975, axis=1)[:, None] + + ##Alernenately we could sample from int p(y|f*)p(f*|x*) df* + def t_gaussian(f, mu, var): + return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5)) + * ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2))) + ) + + def t_gauss_int(mu, var): + print "Mu: ", mu + print "var: ", var + result = integrate.quad(t_gaussian, 0.025, 0.975, args=(mu, var)) + print "Result: ", result + return result[0] + + vec_t_gauss_int = np.vectorize(t_gauss_int) + + p = vec_t_gauss_int(mu, var) + p_025 = mu - p + p_975 = mu + p + return mu, np.nan*mu, p_025, p_975 + + +class weibull_survival(likelihood_function): + """Weibull t likelihood distribution for survival analysis with censoring + For nomanclature see Bayesian Survival Analysis + + Laplace: + Needs functions to calculate + ln p(yi|fi) + dln p(yi|fi)_dfi + d2ln p(yi|fi)_d2fifj + """ + def __init__(self, shape, scale): + self.shape = shape + self.scale = scale + + #FIXME: This should be in the superclass + self.log_concave = True + + def link_function(self, y, f, extra_data=None): + """ + link_function $\ln p(y|f)$, i.e. log likelihood + + $$\ln p(y|f) = v_{i}(\ln \alpha + (\alpha - 1)\ln y_{i} + f_{i}) - y_{i}^{\alpha}\exp(f_{i})$$ + + :y: time of event data + :f: latent variables f + :extra_data: the censoring indicator, 1 for censored, 0 for not + :returns: float(likelihood evaluated for this point) + + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + + v = extra_data + objective = v*(np.log(self.shape) + (self.shape - 1)*np.log(y) + f) - (y**self.shape)*np.exp(f) # FIXME: CHECK THIS WITH BOOK, wheres scale? + return np.sum(objective) + + def link_grad(self, y, f, extra_data=None): + """ + Gradient of the link function at y, given f w.r.t f + + $$\frac{d}{df} \ln p(y_{i}|f_{i}) = v_{i} - y_{i}\exp(f_{i}) + + :y: data + :f: latent variables f + :extra_data: the censoring indicator, 1 for censored, 0 for not + :returns: gradient of likelihood evaluated at points + + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + + v = extra_data + grad = v - (y**self.shape)*np.exp(f) + return np.squeeze(grad) + + def link_hess(self, y, f, extra_data=None): + """ + Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j + i.e. second derivative link_function at y given f f_j w.r.t f and f_j + + Will return diagonal of hessian, since every where else it is 0 + + $$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$ + + :y: data + :f: latent variables f + :extra_data: extra_data which is not used hessian + :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) + """ + y = np.squeeze(y) + f = np.squeeze(f) + assert y.shape == f.shape + + hess = (y**self.shape)*np.exp(f) + return np.squeeze(hess)