GPy/python/likelihoods/likelihood_function.py

from scipy.special import gammaln, gamma
from scipy import integrate
import numpy as np
from GPy.likelihoods.likelihood_functions import likelihood_function
from scipy import stats

class student_t(likelihood_function):
    """Student t likelihood distribution
    For nomanclature see Bayesian Data Analysis 2003 p576

    $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$

    Laplace:
    Needs functions to calculate
    ln p(yi|fi)
    dln p(yi|fi)_dfi
    d2ln p(yi|fi)_d2fifj
    """
    def __init__(self, deg_free, sigma=2):
        self.v = deg_free
        self.sigma = sigma

        #FIXME: This should be in the superclass
        self.log_concave = False

    @property
    def variance(self):
        return (self.v / float(self.v - 2)) * (self.sigma**2)

    def link_function(self, y, f):
        """link_function $\ln p(y|f)$
        $$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$

        :y: data
        :f: latent variables f
        :returns: float(likelihood evaluated for this point)

        """
        y = np.squeeze(y)
        f = np.squeeze(f)
        assert y.shape == f.shape

        e = y - f
        objective = (gammaln((self.v + 1) * 0.5)
                     - gammaln(self.v * 0.5)
                     + np.log(self.sigma * np.sqrt(self.v * np.pi))
                     - (self.v + 1) * 0.5
                     * np.log(1 + ((e**2 / self.sigma**2) / self.v))
                     )
        print (e**2).shape
        return np.sum(objective)

    def link_grad(self, y, f):
        """
        Gradient of the link function at y, given f w.r.t f

        $$\frac{d}{df}p(y_{i}|f_{i}) = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$

        :y: data
        :f: latent variables f
        :returns: gradient of likelihood evaluated at points

        """
        y = np.squeeze(y)
        f = np.squeeze(f)
        assert y.shape == f.shape
        e = y - f
        grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2))
        return np.squeeze(grad)

    def link_hess(self, y, f):
        """
        Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
        i.e. second derivative link_function at y given f f_j  w.r.t f and f_j

        Will return diaganol of hessian, since every where else it is 0

        $$\frac{d^{2}p(y_{i}|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$

        :y: data
        :f: latent variables f
        :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
        """
        y = np.squeeze(y)
        f = np.squeeze(f)
        assert y.shape == f.shape
        e = y - f
        hess = ((self.v + 1)*(e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2)*self.v) + e**2)**2)
        return np.squeeze(hess)

    def predictive_values(self, mu, var):
        """
        Compute  mean, and conficence interval (percentiles 5 and 95) of the prediction

        Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
        (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
        *((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
        """

        #We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
        #Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
        #Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
        #Which was also given to us as (var)
        #We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
        #However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
        true_var = var + self.variance

        #Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now
        #need the 95 and 5 percentiles.
        #FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles
        p_025 = mu - 2.*true_var
        p_975 = mu + 2.*true_var

        return mu, np.nan*mu, p_025, p_975

    def sample_predicted_values(self, mu, var):
        """ Experimental sample approches and numerical integration """
        #p_025 = stats.t.ppf(.025, mu)
        #p_975 = stats.t.ppf(.975, mu)

        num_test_points = mu.shape[0]
        #Each mu is the latent point f* at the test point x*,
        #and the var is the gaussian variance at this point
        #Take lots of samples from this, so we have lots of possible values
        #for latent point f* for each test point x* weighted by how likely we were to pick it
        print "Taking %d samples of f*".format(num_test_points)
        num_f_samples = 10
        num_y_samples = 10
        student_t_means = np.random.normal(loc=mu, scale=np.sqrt(var), size=(num_test_points, num_f_samples))
        print "Student t means shape: ", student_t_means.shape

        #Now we have lots of f*, lets work out the likelihood of getting this by sampling
        #from a student t centred on this point, sample many points from this distribution
        #centred on f*
        #for test_point, f in enumerate(student_t_means):
            #print test_point
            #print f.shape
            #student_t_samples = stats.t.rvs(self.v, loc=f[:,None],
                                            #scale=self.sigma,
                                            #size=(num_f_samples, num_y_samples))
            #print student_t_samples.shape

        student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:,None],
                                        scale=self.sigma,
                                        size=(num_test_points, num_y_samples, num_f_samples))
        student_t_samples = np.reshape(student_t_samples,
                                       (num_test_points, num_y_samples*num_f_samples))

        #Now take the 97.5 and 0.25 percentile of these points
        p_025 = stats.scoreatpercentile(student_t_samples, .025, axis=1)[:, None]
        p_975 = stats.scoreatpercentile(student_t_samples, .975, axis=1)[:, None]

        ##Alernenately we could sample from int p(y|f*)p(f*|x*) df*
        def t_gaussian(f, mu, var):
            return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5))
                        * ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2)))
                    )

        def t_gauss_int(mu, var):
            print "Mu: ", mu
            print "var: ", var
            result = integrate.quad(t_gaussian, 0.025, 0.975, args=(mu, var))
            print "Result: ", result
            return result[0]

        vec_t_gauss_int = np.vectorize(t_gauss_int)

        p = vec_t_gauss_int(mu, var)
        p_025 = mu - p
        p_975 = mu + p
        import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
Working laplace, just needs predictive values 2013-03-28 17:42:42 +00:00			`from scipy.special import gammaln, gamma`
			`from scipy import integrate`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`import numpy as np`
			`from GPy.likelihoods.likelihood_functions import likelihood_function`
Just breaking some things... 2013-03-19 11:47:53 +00:00			`from scipy import stats`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00
			`class student_t(likelihood_function):`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`"""Student t likelihood distribution`
			`For nomanclature see Bayesian Data Analysis 2003 p576`

Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`$$\ln p(y_{i}\|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$`
Added some comments 2013-03-13 11:54:33 +00:00
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`Laplace:`
			`Needs functions to calculate`
			`ln p(yi\|fi)`
			`dln p(yi\|fi)_dfi`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`d2ln p(yi\|fi)_d2fifj`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`"""`
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`def __init__(self, deg_free, sigma=2):`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`self.v = deg_free`
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`self.sigma = sigma`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
Worked out in terms of W, needs gradients implementing 2013-03-19 18:21:57 +00:00			`#FIXME: This should be in the superclass`
			`self.log_concave = False`

Added predicted values for student t, works well 2013-04-02 12:33:01 +01:00			`@property`
			`def variance(self):`
			`return (self.v / float(self.v - 2)) * (self.sigma**2)`

Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`def link_function(self, y, f):`
			`"""link_function $\ln p(y\|f)$`
			`$$\ln p(y_{i}\|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`:y: data`
			`:f: latent variables f`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`:returns: float(likelihood evaluated for this point)`

			`"""`
Still working on rasmussen, link function needs vectorizing I think 2013-04-02 20:00:31 +01:00			`y = np.squeeze(y)`
			`f = np.squeeze(f)`
following naming convention better, lots of inverses which should be able to get rid of one or two, unsure if it works 2013-03-18 15:59:12 +00:00			`assert y.shape == f.shape`
Still working on rasmussen, link function needs vectorizing I think 2013-04-02 20:00:31 +01:00
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`e = y - f`
			`objective = (gammaln((self.v + 1) * 0.5)`
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`- gammaln(self.v * 0.5)`
			`+ np.log(self.sigma * np.sqrt(self.v * np.pi))`
			`- (self.v + 1) * 0.5`
			`* np.log(1 + ((e2 / self.sigma2) / self.v))`
			`)`
Still working on rasmussen, link function needs vectorizing I think 2013-04-02 20:00:31 +01:00			`print (e**2).shape`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`return np.sum(objective)`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`def link_grad(self, y, f):`
			`"""`
			`Gradient of the link function at y, given f w.r.t f`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`$$\frac{d}{df}p(y_{i}\|f_{i}) = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$`
Added some comments 2013-03-13 11:54:33 +00:00
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`:y: data`
			`:f: latent variables f`
			`:returns: gradient of likelihood evaluated at points`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
			`"""`
Still working on rasmussen, link function needs vectorizing I think 2013-04-02 20:00:31 +01:00			`y = np.squeeze(y)`
			`f = np.squeeze(f)`
following naming convention better, lots of inverses which should be able to get rid of one or two, unsure if it works 2013-03-18 15:59:12 +00:00			`assert y.shape == f.shape`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`e = y - f`
			`grad = ((self.v + 1) * e) / (self.v * (self.sigma2) + (e2))`
Still working on rasmussen, link function needs vectorizing I think 2013-04-02 20:00:31 +01:00			`return np.squeeze(grad)`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`def link_hess(self, y, f):`
			`"""`
			`Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j`
			`i.e. second derivative link_function at y given f f_j w.r.t f and f_j`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`Will return diaganol of hessian, since every where else it is 0`
Added some comments 2013-03-13 11:54:33 +00:00
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`$$\frac{d^{2}p(y_{i}\|f_{i})}{df^{2}} = \frac{(v + 1)(y - f)}{v \sigma^{2} + (y_{i} - f_{i})^{2}}$$`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00
Trying to 'debug' 2013-03-14 15:30:22 +00:00			`:y: data`
			`:f: latent variables f`
			`:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)`
Initial commit, setting up the laplace approximation for a student t 2013-03-12 17:42:00 +00:00			`"""`
Still working on rasmussen, link function needs vectorizing I think 2013-04-02 20:00:31 +01:00			`y = np.squeeze(y)`
			`f = np.squeeze(f)`
following naming convention better, lots of inverses which should be able to get rid of one or two, unsure if it works 2013-03-18 15:59:12 +00:00			`assert y.shape == f.shape`
Got most of laplace approximation working 2013-03-13 17:55:41 +00:00			`e = y - f`
Worked out in terms of W, needs gradients implementing 2013-03-19 18:21:57 +00:00			`hess = ((self.v + 1)(e2 - self.v(self.sigma2))) / ((((self.sigma2)self.v) + e2)*2)`
Still working on rasmussen, link function needs vectorizing I think 2013-04-02 20:00:31 +01:00			`return np.squeeze(hess)`
Just breaking some things... 2013-03-19 11:47:53 +00:00
			`def predictive_values(self, mu, var):`
			`"""`
Added predicted values for student t, works well 2013-04-02 12:33:01 +01:00			`Compute mean, and conficence interval (percentiles 5 and 95) of the prediction`
Just breaking some things... 2013-03-19 11:47:53 +00:00
Added predicted values for student t, works well 2013-04-02 12:33:01 +01:00			`Need to find what the variance is at the latent points for a student tnormal p(y\|f)p(f)`
			`(((g((v+1)/2))/(g(v/2)ssqrt(vpi)))(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))`
			`((1/(ssqrt(2pi)))exp(-(1/(2(s^2)))((y-f)^2)))`
Working laplace, just needs predictive values 2013-03-28 17:42:42 +00:00			`"""`
Added predicted values for student t, works well 2013-04-02 12:33:01 +01:00
			`#We want the variance around test points y which comes from int p(y\|f)p(f) df`
			`#Var(y) = Var(E[y\|f]) + E[Var(y\|f*)]`
			`#Since we are given f* (mu) which is our mean (expected) value of y\|f then the variance is the variance around this`
			`#Which was also given to us as (var)`
			`#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution`
			`#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom`
			`true_var = var + self.variance`

			`#Now we have an analytical solution for the variances of the distribution p(y\|f)p(f*) around our test points but we now`
			`#need the 95 and 5 percentiles.`
			`#FIXME: Hack, just pretend p(y\|f)p(f*) is a gaussian and use the gaussian's percentiles`
			`p_025 = mu - 2.*true_var`
			`p_975 = mu + 2.*true_var`

			`return mu, np.nan*mu, p_025, p_975`

			`def sample_predicted_values(self, mu, var):`
			`""" Experimental sample approches and numerical integration """`
Working laplace, just needs predictive values 2013-03-28 17:42:42 +00:00			`#p_025 = stats.t.ppf(.025, mu)`
			`#p_975 = stats.t.ppf(.975, mu)`

			`num_test_points = mu.shape[0]`
			`#Each mu is the latent point f* at the test point x*,`
			`#and the var is the gaussian variance at this point`
			`#Take lots of samples from this, so we have lots of possible values`
			`#for latent point f* for each test point x* weighted by how likely we were to pick it`
			`print "Taking %d samples of f*".format(num_test_points)`
			`num_f_samples = 10`
			`num_y_samples = 10`
			`student_t_means = np.random.normal(loc=mu, scale=np.sqrt(var), size=(num_test_points, num_f_samples))`
			`print "Student t means shape: ", student_t_means.shape`

			`#Now we have lots of f*, lets work out the likelihood of getting this by sampling`
			`#from a student t centred on this point, sample many points from this distribution`
			`#centred on f*`
			`#for test_point, f in enumerate(student_t_means):`
			`#print test_point`
			`#print f.shape`
			`#student_t_samples = stats.t.rvs(self.v, loc=f[:,None],`
			`#scale=self.sigma,`
			`#size=(num_f_samples, num_y_samples))`
			`#print student_t_samples.shape`

			`student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:,None],`
			`scale=self.sigma,`
			`size=(num_test_points, num_y_samples, num_f_samples))`
			`student_t_samples = np.reshape(student_t_samples,`
			`(num_test_points, num_y_samples*num_f_samples))`

			`#Now take the 97.5 and 0.25 percentile of these points`
			`p_025 = stats.scoreatpercentile(student_t_samples, .025, axis=1)[:, None]`
			`p_975 = stats.scoreatpercentile(student_t_samples, .975, axis=1)[:, None]`

			`##Alernenately we could sample from int p(y\|f)p(f\|x) df`
			`def t_gaussian(f, mu, var):`
			`return (((gamma((self.v+1)0.5)) / (gamma(self.v0.5)self.sigmanp.sqrt(self.vnp.pi))) ((1+(1/self.v)(((mu-f)/self.sigma)2))(-(self.v+1)0.5))`
			`* ((1/(np.sqrt(2np.pivar)))np.exp(-(1/(2var)) ((mu-f)*2)))`
			`)`

			`def t_gauss_int(mu, var):`
			`print "Mu: ", mu`
			`print "var: ", var`
Added predicted values for student t, works well 2013-04-02 12:33:01 +01:00			`result = integrate.quad(t_gaussian, 0.025, 0.975, args=(mu, var))`
Working laplace, just needs predictive values 2013-03-28 17:42:42 +00:00			`print "Result: ", result`
			`return result[0]`

			`vec_t_gauss_int = np.vectorize(t_gauss_int)`

Added predicted values for student t, works well 2013-04-02 12:33:01 +01:00			`p = vec_t_gauss_int(mu, var)`
			`p_025 = mu - p`
			`p_975 = mu + p`
Working laplace, just needs predictive values 2013-03-28 17:42:42 +00:00			`import ipdb; ipdb.set_trace() ### XXX BREAKPOINT`