diff --git a/GPy/likelihoods/.DS_Store b/GPy/likelihoods/.DS_Store new file mode 100644 index 00000000..69fb4861 Binary files /dev/null and b/GPy/likelihoods/.DS_Store differ diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py new file mode 100644 index 00000000..0708c763 --- /dev/null +++ b/GPy/likelihoods/__init__.py @@ -0,0 +1,6 @@ +from bernoulli import Bernoulli +from exponential import Exponential +from gaussian import Gaussian +from gamma import Gamma +from poisson import Poisson +from student_t import Student_t diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py new file mode 100644 index 00000000..47737788 --- /dev/null +++ b/GPy/likelihoods/bernoulli.py @@ -0,0 +1,221 @@ +# Copyright (c) 2012, 2013 The GPy authors +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from likelihood import Likelihood + +class Bernoulli(Likelihood): + """ + Bernoulli likelihood + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} + + .. Note:: + Y is expected to take values in {-1,1} + Probit likelihood usually used + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance) + if isinstance(gp_link , (gp_transformations.Heaviside, gp_transformations.Probit)): + self.log_concave = True + + def _preprocess_values(self,Y): + """ + Check if the values of the observations correspond to the values + assumed by the likelihood function. + + ..Note:: Binary classification algorithm works better with classes {-1,1} + """ + Y_prep = Y.copy() + Y1 = Y[Y.flatten()==1].size + Y2 = Y[Y.flatten()==0].size + assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.' + Y_prep[Y.flatten() == 0] = -1 + return Y_prep + + def _moments_match_analytical(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + if data_i == 1: + sign = 1. + elif data_i == 0: + sign = -1 + else: + raise ValueError("bad value for Bernouilli observation (0,1)") + if isinstance(self.gp_link,gp_transformations.Probit): + z = sign*v_i/np.sqrt(tau_i**2 + tau_i) + Z_hat = std_norm_cdf(z) + phi = std_norm_pdf(z) + mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) + sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) + + elif isinstance(self.gp_link,gp_transformations.Heaviside): + a = sign*v_i/np.sqrt(tau_i) + Z_hat = std_norm_cdf(a) + N = std_norm_pdf(a) + mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i) + sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i + if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])): + stop + else: + raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.gp_transformations.__name__)) + + return Z_hat, mu_hat, sigma2_hat + + def _predictive_mean_analytical(self,mu,variance): + + if isinstance(self.gp_link,gp_transformations.Probit): + return stats.norm.cdf(mu/np.sqrt(1+variance)) + + elif isinstance(self.gp_link,gp_transformations.Heaviside): + return stats.norm.cdf(mu/np.sqrt(variance)) + + else: + raise NotImplementedError + + def _predictive_variance_analytical(self,mu,variance, pred_mean): + + if isinstance(self.gp_link,gp_transformations.Heaviside): + return 0. + else: + raise NotImplementedError + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: likelihood evaluated for this point + :rtype: float + + .. Note: + Each y_i must be in {0,1} + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + objective = (link_f**y) * ((1.-link_f)**(1.-y)) + return np.exp(np.sum(np.log(objective))) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood function given link(f) + + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i}) + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: log likelihood evaluated at points link(f) + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + #objective = y*np.log(link_f) + (1.-y)*np.log(link_f) + objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) + return np.sum(objective) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the pdf at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: gradient of log likelihood evaluated at points link(f) + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + grad = (y/link_f) - (1.-y)/(1-link_f) + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j + i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) + + + .. math:: + \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) + return d2logpdf_dlink2 + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: third derivative of log likelihood evaluated at points link(f) + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) + return d3logpdf_dlink3 + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + p = self.gp_link.transf(gp) + return p*(1.-p) + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + ns = np.ones_like(gp, dtype=int) + Ysim = np.random.binomial(ns, self.gp_link.transf(gp)) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/exponential.py b/GPy/likelihoods/exponential.py new file mode 100644 index 00000000..16f3456d --- /dev/null +++ b/GPy/likelihoods/exponential.py @@ -0,0 +1,156 @@ +# Copyright (c) 2012, 2013 GPy Authors +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from likelihood import Likelihood + +class Exponential(NoiseDistribution): + """ + Expoential likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i})) + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + log_objective = link_f*np.exp(-y*link_f) + return np.exp(np.sum(np.log(log_objective))) + #return np.exp(np.sum(-y/link_f - np.log(link_f) )) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i}) + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + log_objective = np.log(link_f) - y*link_f + #logpdf_link = np.sum(-np.log(link_f) - y/link_f) + return np.sum(log_objective) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + grad = 1./link_f - y + #grad = y/(link_f**2) - 1./link_f + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -1./(link_f**2) + #hess = -2*y/(link_f**3) + 1/(link_f**2) + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = 2./(link_f**3) + #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) + return d3lik_dlink3 + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp)**2 + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + Ysim = np.random.exponential(1.0/self.gp_link.transf(gp)) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/gamma.py b/GPy/likelihoods/gamma.py new file mode 100644 index 00000000..a28130ac --- /dev/null +++ b/GPy/likelihoods/gamma.py @@ -0,0 +1,155 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from likelihood import Likelihood + +class Gamma(NoiseDistribution): + """ + Gamma likelihood + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.): + self.beta = beta + super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + #return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance) + alpha = link_f*self.beta + objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha) + return np.exp(np.sum(np.log(objective))) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + #alpha = self.gp_link.transf(gp)*self.beta + #return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) + alpha = link_f*self.beta + log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y + return np.sum(log_objective) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta + #old + #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2) + #old + #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3) + return d3lik_dlink3 + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp)/self.beta diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py new file mode 100644 index 00000000..0aa6c42b --- /dev/null +++ b/GPy/likelihoods/gaussian.py @@ -0,0 +1,300 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from likelihood import Likelihood + +class Gaussian(NoiseDistribution): + """ + Gaussian likelihood + + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} + + :param variance: variance value of the Gaussian distribution + :param N: Number of data points + :type N: int + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None): + self.variance = variance + self.N = N + self._set_params(np.asarray(variance)) + super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance) + if isinstance(gp_link , gp_transformations.Identity): + self.log_concave = True + + def _get_params(self): + return np.array([self.variance]) + + def _get_param_names(self): + return ['noise_model_variance'] + + def _set_params(self, p): + self.variance = float(p) + self.I = np.eye(self.N) + self.covariance_matrix = self.I * self.variance + self.Ki = self.I*(1.0 / self.variance) + #self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix))) + self.ln_det_K = self.N*np.log(self.variance) + + def _gradients(self,partial): + return np.zeros(1) + #return np.sum(partial) + + def _preprocess_values(self,Y): + """ + Check if the values of the observations correspond to the values + assumed by the likelihood function. + """ + return Y + + def _moments_match_analytical(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + sigma2_hat = 1./(1./self.variance + tau_i) + mu_hat = sigma2_hat*(data_i/self.variance + v_i) + sum_var = self.variance + 1./tau_i + Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var) + return Z_hat, mu_hat, sigma2_hat + + def _predictive_mean_analytical(self,mu,sigma): + new_sigma2 = self.predictive_variance(mu,sigma) + return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance) + + def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None): + return 1./(1./self.variance + 1./sigma**2) + + def _mass(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\ + Please negate your function and use pdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + def _nlog_mass(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\ + Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def _dnlog_mass_dgp(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\ + Please negate your function and use dlogpdf_df in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\ + Please negate your function and use d2logpdf_df2 in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: likelihood evaluated for this point + :rtype: float + """ + #Assumes no covariance, exp, sum, log for numerical stability + return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance))))) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log likelihood function given link(f) + + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: log likelihood evaluated for this point + :rtype: float + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi)) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the pdf at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i})) + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: gradient of log likelihood evaluated at points link(f) + :rtype: Nx1 array + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + s2_i = (1.0/self.variance) + grad = s2_i*y - s2_i*link_f + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link_f, w.r.t link_f. + i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) + + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + hess = -(1.0/self.variance)*np.ones((self.N, 1)) + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0 + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: third derivative of log likelihood evaluated at points link(f) + :rtype: Nx1 array + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None] + return d3logpdf_dlink3 + + def dlogpdf_link_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance) + + .. math:: + \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter + :rtype: float + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + e = y - link_f + s_4 = 1.0/(self.variance**2) + dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e)) + return np.sum(dlik_dsigma) # Sure about this sum? + + def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): + """ + Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i})) + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + s_4 = 1.0/(self.variance**2) + dlik_grad_dsigma = -s_4*y + s_4*link_f + return dlik_grad_dsigma + + def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + s_4 = 1.0/(self.variance**2) + d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None] + return d2logpdf_dlink2_dvar + + def dlogpdf_link_dtheta(self, f, y, extra_data=None): + dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) + return np.asarray([[dlogpdf_dvar]]) + + def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): + dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) + return dlogpdf_dlink_dvar + + def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): + d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) + return d2logpdf_dlink2_dvar + + def _mean(self,gp): + """ + Expected value of y under the Mass (or density) function p(y|f) + + .. math:: + E_{p(y|f)}[y] + """ + return self.gp_link.transf(gp) + + def _variance(self,gp): + """ + Variance of y under the Mass (or density) function p(y|f) + + .. math:: + Var_{p(y|f)}[y] + """ + return self.variance + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp]) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py new file mode 100644 index 00000000..ffb6e60d --- /dev/null +++ b/GPy/likelihoods/likelihood.py @@ -0,0 +1,437 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +import pylab as pb +from GPy.util.plot import gpplot +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from GPy.util.misc import chain_1, chain_2, chain_3 +from scipy.integrate import quad +import warnings + +class Likelihood(object): + """ + Likelihood base class + + To use this class, inherrit and define missing functionality. + + The minimum required funciotnality is... TODO + """ + def __init__(self,gp_link,analytical_mean=False,analytical_variance=False): + assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation." + self.gp_link = gp_link + self.analytical_mean = analytical_mean + self.analytical_variance = analytical_variance + if self.analytical_mean: + self.moments_match = self._moments_match_analytical + self.predictive_mean = self._predictive_mean_analytical + else: + self.moments_match = self._moments_match_numerical + self.predictive_mean = self._predictive_mean_numerical + if self.analytical_variance: + self.predictive_variance = self._predictive_variance_analytical + else: + self.predictive_variance = self._predictive_variance_numerical + + self.log_concave = False + + def _get_params(self): + return np.zeros(0) + + def _get_param_names(self): + return [] + + def _set_params(self,p): + pass + + def _gradients(self,partial): + return np.zeros(0) + + def _preprocess_values(self,Y): + """ + In case it is needed, this function assess the output values or makes any pertinent transformation on them. + + :param Y: observed output + :type Y: Nx1 numpy.darray + + """ + return Y + + def _moments_match_analytical(self,obs,tau,v): + """ + If available, this function computes the moments analytically. + """ + raise NotImplementedError + + def log_predictive_density(self, y_test, mu_star, var_star): + """ + Calculation of the log predictive density + + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) + + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array + :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) + :type mu_star: (Nx1) array + :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) + :type var_star: (Nx1) array + """ + assert y_test.shape==mu_star.shape + assert y_test.shape==var_star.shape + assert y_test.shape[1] == 1 + def integral_generator(y, m, v): + """Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*""" + def f(f_star): + return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star)) + return f + + scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())]) + scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1) + p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star) + return np.log(p_ystar) + + def _moments_match_numerical(self,obs,tau,v): + """ + Calculation of moments using quadrature + + :param obs: observed output + :param tau: cavity distribution 1st natural parameter (precision) + :param v: cavity distribution 2nd natural paramenter (mu*precision) + """ + #Compute first integral for zeroth moment. + #NOTE constant np.sqrt(2*pi/tau) added at the end of the function + mu = v/tau + def int_1(f): + return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) + z_scaled, accuracy = quad(int_1, -np.inf, np.inf) + + #Compute second integral for first moment + def int_2(f): + return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) + mean, accuracy = quad(int_2, -np.inf, np.inf) + mean /= z_scaled + + #Compute integral for variance + def int_3(f): + return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) + Ef2, accuracy = quad(int_3, -np.inf, np.inf) + Ef2 /= z_scaled + variance = Ef2 - mean**2 + + #Add constant to the zeroth moment + #NOTE: this constant is not needed in the other moments because it cancells out. + z = z_scaled/np.sqrt(2*np.pi/tau) + + return z, mean, variance + + def _predictive_mean_analytical(self,mu,sigma): + """ + Predictive mean + .. math:: + E(Y^{*}|Y) = E( E(Y^{*}|f^{*}, Y) ) + + If available, this function computes the predictive mean analytically. + """ + raise NotImplementedError + + def _predictive_variance_analytical(self,mu,sigma): + """ + Predictive variance + .. math:: + V(Y^{*}| Y) = E( V(Y^{*}|f^{*}, Y) ) + V( E(Y^{*}|f^{*}, Y) ) + + If available, this function computes the predictive variance analytically. + """ + raise NotImplementedError + + def _predictive_mean_numerical(self,mu,variance): + """ + Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) ) + + :param mu: mean of posterior + :param sigma: standard deviation of posterior + + """ + def int_mean(f,m,v): + return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m)) + scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] + mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) + + return mean + + def _predictive_variance_numerical(self,mu,variance,predictive_mean=None): + """ + Numerical approximation to the predictive variance: V(Y_star) + + The following variance decomposition is used: + V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + + :param mu: mean of posterior + :param sigma: standard deviation of posterior + :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. + + """ + #sigma2 = sigma**2 + normalizer = np.sqrt(2*np.pi*variance) + + # E( V(Y_star|f_star) ) + def int_var(f,m,v): + return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m)) + scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] + exp_var = np.array(scaled_exp_variance)[:,None] / normalizer + + #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2 + + #E( E(Y_star|f_star) )**2 + if predictive_mean is None: + predictive_mean = self.predictive_mean(mu,variance) + predictive_mean_sq = predictive_mean**2 + + #E( E(Y_star|f_star)**2 ) + def int_pred_mean_sq(f,m,v,predictive_mean_sq): + return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m)) + scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] + exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer + + var_exp = exp_exp2 - predictive_mean_sq + + # V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + return exp_var + var_exp + + def pdf_link(self, link_f, y, extra_data=None): + raise NotImplementedError + + def logpdf_link(self, link_f, y, extra_data=None): + raise NotImplementedError + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + raise NotImplementedError + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + raise NotImplementedError + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + raise NotImplementedError + + def dlogpdf_link_dtheta(self, link_f, y, extra_data=None): + raise NotImplementedError + + def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None): + raise NotImplementedError + + def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None): + raise NotImplementedError + + def pdf(self, f, y, extra_data=None): + """ + Evaluates the link function link(f) then computes the likelihood (pdf) using it + + .. math: + p(y|\\lambda(f)) + + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: likelihood evaluated for this point + :rtype: float + """ + link_f = self.gp_link.transf(f) + return self.pdf_link(link_f, y, extra_data=extra_data) + + def logpdf(self, f, y, extra_data=None): + """ + Evaluates the link function link(f) then computes the log likelihood (log pdf) using it + + .. math: + \\log p(y|\\lambda(f)) + + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: log likelihood evaluated for this point + :rtype: float + """ + link_f = self.gp_link.transf(f) + return self.logpdf_link(link_f, y, extra_data=extra_data) + + def dlogpdf_df(self, f, y, extra_data=None): + """ + Evaluates the link function link(f) then computes the derivative of log likelihood using it + Uses the Faa di Bruno's formula for the chain rule + + .. math:: + \\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df} + + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: derivative of log likelihood evaluated for this point + :rtype: 1xN array + """ + link_f = self.gp_link.transf(f) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + dlink_df = self.gp_link.dtransf_df(f) + return chain_1(dlogpdf_dlink, dlink_df) + + def d2logpdf_df2(self, f, y, extra_data=None): + """ + Evaluates the link function link(f) then computes the second derivative of log likelihood using it + Uses the Faa di Bruno's formula for the chain rule + + .. math:: + \\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}} + + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: second derivative of log likelihood evaluated for this point (diagonal only) + :rtype: 1xN array + """ + link_f = self.gp_link.transf(f) + d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data) + dlink_df = self.gp_link.dtransf_df(f) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + d2link_df2 = self.gp_link.d2transf_df2(f) + return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) + + def d3logpdf_df3(self, f, y, extra_data=None): + """ + Evaluates the link function link(f) then computes the third derivative of log likelihood using it + Uses the Faa di Bruno's formula for the chain rule + + .. math:: + \\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}} + + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: third derivative of log likelihood evaluated for this point + :rtype: float + """ + link_f = self.gp_link.transf(f) + d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data) + dlink_df = self.gp_link.dtransf_df(f) + d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data) + d2link_df2 = self.gp_link.d2transf_df2(f) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + d3link_df3 = self.gp_link.d3transf_df3(f) + return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) + + def dlogpdf_dtheta(self, f, y, extra_data=None): + """ + TODO: Doc strings + """ + if len(self._get_param_names()) > 0: + link_f = self.gp_link.transf(f) + return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([1, 0]) + + def dlogpdf_df_dtheta(self, f, y, extra_data=None): + """ + TODO: Doc strings + """ + if len(self._get_param_names()) > 0: + link_f = self.gp_link.transf(f) + dlink_df = self.gp_link.dtransf_df(f) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) + return chain_1(dlogpdf_dlink_dtheta, dlink_df) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([f.shape[0], 0]) + + def d2logpdf_df2_dtheta(self, f, y, extra_data=None): + """ + TODO: Doc strings + """ + if len(self._get_param_names()) > 0: + link_f = self.gp_link.transf(f) + dlink_df = self.gp_link.dtransf_df(f) + d2link_df2 = self.gp_link.d2transf_df2(f) + d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) + return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([f.shape[0], 0]) + + def _laplace_gradients(self, f, y, extra_data=None): + dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data) + dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data) + d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data) + + #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' + # ensure we have gradients for every parameter we want to optimize + assert dlogpdf_dtheta.shape[1] == len(self._get_param_names()) + assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names()) + assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names()) + return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta + + def predictive_values(self, mu, var, full_cov=False, sampling=False, num_samples=10000): + """ + Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction. + + :param mu: mean of the latent variable, f, of posterior + :param var: variance of the latent variable, f, of posterior + :param full_cov: whether to use the full covariance or just the diagonal + :type full_cov: Boolean + :param num_samples: number of samples to use in computing quantiles and + possibly mean variance + :type num_samples: integer + :param sampling: Whether to use samples for mean and variances anyway + :type sampling: Boolean + + """ + + if sampling: + #Get gp_samples f* using posterior mean and variance + if not full_cov: + gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()), + size=num_samples).T + else: + gp_samples = np.random.multivariate_normal(mu.flatten(), var, + size=num_samples).T + #Push gp samples (f*) through likelihood to give p(y*|f*) + samples = self.samples(gp_samples) + axis=-1 + + #Calculate mean, variance and precentiles from samples + print "WARNING: Using sampling to calculate mean, variance and predictive quantiles." + pred_mean = np.mean(samples, axis=axis)[:,None] + pred_var = np.var(samples, axis=axis)[:,None] + q1 = np.percentile(samples, 2.5, axis=axis)[:,None] + q3 = np.percentile(samples, 97.5, axis=axis)[:,None] + + else: + + pred_mean = self.predictive_mean(mu, var) + pred_var = self.predictive_variance(mu, var, pred_mean) + print "WARNING: Predictive quantiles are only computed when sampling." + q1 = np.repeat(np.nan,pred_mean.size)[:,None] + q3 = q1.copy() + + return pred_mean, pred_var, q1, q3 + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + raise NotImplementedError diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py new file mode 100644 index 00000000..9c046223 --- /dev/null +++ b/GPy/likelihoods/link_functions.py @@ -0,0 +1,158 @@ +# Copyright (c) 2012, 2013 The GPy authors +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats +import scipy as sp +import pylab as pb +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf + +class GPTransformation(object): + """ + Link function class for doing non-Gaussian likelihoods approximation + + :param Y: observed output (Nx1 numpy.darray) + + .. note:: Y values allowed depend on the likelihood_function used + + """ + def __init__(self): + pass + + def transf(self,f): + """ + Gaussian process tranformation function, latent space -> output space + """ + raise NotImplementedError + + def dtransf_df(self,f): + """ + derivative of transf(f) w.r.t. f + """ + raise NotImplementedError + + def d2transf_df2(self,f): + """ + second derivative of transf(f) w.r.t. f + """ + raise NotImplementedError + + def d3transf_df3(self,f): + """ + third derivative of transf(f) w.r.t. f + """ + raise NotImplementedError + +class Identity(GPTransformation): + """ + .. math:: + + g(f) = f + + """ + def transf(self,f): + return f + + def dtransf_df(self,f): + return np.ones_like(f) + + def d2transf_df2(self,f): + return np.zeros_like(f) + + def d3transf_df3(self,f): + return np.zeros_like(f) + + +class Probit(GPTransformation): + """ + .. math:: + + g(f) = \\Phi^{-1} (mu) + + """ + def transf(self,f): + return std_norm_cdf(f) + + def dtransf_df(self,f): + return std_norm_pdf(f) + + def d2transf_df2(self,f): + #FIXME + return -f * std_norm_pdf(f) + + def d3transf_df3(self,f): + #FIXME + f2 = f**2 + return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2) + +class Log(GPTransformation): + """ + .. math:: + + g(f) = \\log(\\mu) + + """ + def transf(self,f): + return np.exp(f) + + def dtransf_df(self,f): + return np.exp(f) + + def d2transf_df2(self,f): + return np.exp(f) + + def d3transf_df3(self,f): + return np.exp(f) + +class Log_ex_1(GPTransformation): + """ + .. math:: + + g(f) = \\log(\\exp(\\mu) - 1) + + """ + def transf(self,f): + return np.log(1.+np.exp(f)) + + def dtransf_df(self,f): + return np.exp(f)/(1.+np.exp(f)) + + def d2transf_df2(self,f): + aux = np.exp(f)/(1.+np.exp(f)) + return aux*(1.-aux) + + def d3transf_df3(self,f): + aux = np.exp(f)/(1.+np.exp(f)) + daux_df = aux*(1.-aux) + return daux_df - (2.*aux*daux_df) + +class Reciprocal(GPTransformation): + def transf(self,f): + return 1./f + + def dtransf_df(self,f): + return -1./(f**2) + + def d2transf_df2(self,f): + return 2./(f**3) + + def d3transf_df3(self,f): + return -6./(f**4) + +class Heaviside(GPTransformation): + """ + + .. math:: + + g(f) = I_{x \\in A} + + """ + def transf(self,f): + #transformation goes here + return np.where(f>0, 1, 0) + + def dtransf_df(self,f): + raise NotImplementedError, "This function is not differentiable!" + + def d2transf_df2(self,f): + raise NotImplementedError, "This function is not differentiable!" diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py new file mode 100644 index 00000000..73a10570 --- /dev/null +++ b/GPy/likelihoods/poisson.py @@ -0,0 +1,152 @@ +from __future__ import division +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from likelihood import Likelihood + +class Poisson(Likelihood): + """ + Poisson likelihood + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})} + + .. Note:: + Y is expected to take values in {0,1,2,...} + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return np.prod(stats.poisson.pmf(y,link_f)) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}! + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1)) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1 + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return y/link_f - 1 + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -y/(link_f**2) + return hess + #d2_df = self.gp_link.d2transf_df2(gp) + #transf = self.gp_link.transf(gp) + #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = 2*y/(link_f)**3 + return d3lik_dlink3 + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + Ysim = np.random.poisson(self.gp_link.transf(gp)) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py new file mode 100644 index 00000000..3336235b --- /dev/null +++ b/GPy/likelihoods/student_t.py @@ -0,0 +1,277 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats, special +import scipy as sp +import gp_transformations +from scipy import stats, integrate +from scipy.special import gammaln, gamma +from likelihood import Likelihood + +class StudentT(Likelihood): + """ + Student T likelihood + + For nomanclature see Bayesian Data Analysis 2003 p576 + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} + + """ + def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2): + self.v = deg_free + self.sigma2 = sigma2 + + self._set_params(np.asarray(sigma2)) + super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance) + self.log_concave = False + + def _get_params(self): + return np.asarray(self.sigma2) + + def _get_param_names(self): + return ["t_noise_std2"] + + def _set_params(self, x): + self.sigma2 = float(x) + + @property + def variance(self, extra_data=None): + return (self.v / float(self.v - 2)) * self.sigma2 + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + #Careful gamma(big_number) is infinity! + objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) + / (np.sqrt(self.v * np.pi * self.sigma2))) + * ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1))) + ) + return np.prod(objective) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + objective = (+ gammaln((self.v + 1) * 0.5) + - gammaln(self.v * 0.5) + - 0.5*np.log(self.sigma2 * self.v * np.pi) + - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) + ) + return np.sum(objective) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / + ((e**2 + self.sigma2*self.v)**3) + ) + return d3lik_dlink3 + + def dlogpdf_link_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of likelihood evaluated at points f w.r.t variance parameter + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) + return np.sum(dlogpdf_dvar) + + def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): + """ + Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} + + :param link_f: latent variables link_f + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of likelihood evaluated at points f w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) + return dlogpdf_dlink_dvar + + def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) + / ((self.sigma2*self.v + (e**2))**3) + ) + return d2logpdf_dlink2_dvar + + def dlogpdf_link_dtheta(self, f, y, extra_data=None): + dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) + return np.asarray([[dlogpdf_dvar]]) + + def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): + dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) + return dlogpdf_dlink_dvar + + def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): + d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) + return d2logpdf_dlink2_dvar + + def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None): + """ + Compute predictive variance of student_t*normal p(y*|f*)p(f*) + + Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*) + (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2)) + *((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2))) + """ + + #FIXME: Not correct + #We want the variance around test points y which comes from int p(y*|f*)p(f*) df* + #Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)] + #Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this + #Which was also given to us as (var) + #We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution + #However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom + true_var = 1/(1/sigma**2 + 1/self.variance) + + return true_var + + def _predictive_mean_analytical(self, mu, sigma): + """ + Compute mean of the prediction + """ + #FIXME: Not correct + return mu + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + #FIXME: Very slow as we are computing a new random variable per input! + #Can't get it to sample all at the same time + #student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp]) + dfs = np.ones_like(gp)*self.v + scales = np.ones_like(gp)*np.sqrt(self.sigma2) + student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp), + scale=scales) + return student_t_samples.reshape(orig_shape)