Merge branch 'params' of github.com:SheffieldML/GPy into params

Conflicts: GPy/likelihoods/mixed_noise.py
2026-05-30 14:35:15 +02:00 · 2014-03-19 09:39:35 +00:00 · 2014-03-19 09:39:35 +00:00 · fcb6742b60
commit fcb6742b60
parent 16ffc11c97 b96ff9cdfb
21 changed files with 197 additions and 228 deletions
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -95,7 +95,7 @@ class Bernoulli(Likelihood):
        else:
            return np.nan

-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
        """
        Likelihood function given link(f)

@ -106,7 +106,7 @@ class Bernoulli(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
        :returns: likelihood evaluated for this point
        :rtype: float

@ -118,7 +118,7 @@ class Bernoulli(Likelihood):
        objective = np.where(y, link_f, 1.-link_f)
        return np.exp(np.sum(np.log(objective)))

-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
        """
        Log Likelihood function given link(f)

@ -129,7 +129,7 @@ class Bernoulli(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
        :returns: log likelihood evaluated at points link(f)
        :rtype: float
        """
@ -140,7 +140,7 @@ class Bernoulli(Likelihood):
        np.seterr(**state)
        return np.sum(objective)

-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
        Gradient of the pdf at y, given link(f) w.r.t link(f)

@ -151,7 +151,7 @@ class Bernoulli(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
        :returns: gradient of log likelihood evaluated at points link(f)
        :rtype: Nx1 array
        """
@ -162,7 +162,7 @@ class Bernoulli(Likelihood):
        np.seterr(**state)
        return grad

-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
        """
        Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
        i.e. second derivative logpdf at y given link(f_i) link(f_j)  w.r.t link(f_i) and link(f_j)
@ -175,7 +175,7 @@ class Bernoulli(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
        :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
        :rtype: Nx1 array

@ -190,7 +190,7 @@ class Bernoulli(Likelihood):
        np.seterr(**state)
        return d2logpdf_dlink2

-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
        """
        Third order derivative log-likelihood function at y given link(f) w.r.t link(f)

@ -201,7 +201,7 @@ class Bernoulli(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
        :returns: third derivative of log likelihood evaluated at points link(f)
        :rtype: Nx1 array
        """
--- a/GPy/likelihoods/exponential.py
+++ b/GPy/likelihoods/exponential.py
@ -18,13 +18,12 @@ class Exponential(Likelihood):
    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
    $$
    """
-    def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
-        super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
+    def __init__(self,gp_link=None):
+        if gp_link is None:
+            gp_link = link_functions.Log()
+        super(Exponential, self).__init__(gp_link, 'ExpLikelihood')

-    def _preprocess_values(self,Y):
-        return Y
-
-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
        """
        Likelihood function given link(f)

@ -35,16 +34,15 @@ class Exponential(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
        :returns: likelihood evaluated for this point
        :rtype: float
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        log_objective = link_f*np.exp(-y*link_f)
        return np.exp(np.sum(np.log(log_objective)))
-        #return np.exp(np.sum(-y/link_f - np.log(link_f) ))

-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
        """
        Log Likelihood Function given link(f)

@ -55,17 +53,16 @@ class Exponential(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
        :returns: likelihood evaluated for this point
        :rtype: float

        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        log_objective = np.log(link_f) - y*link_f
-        #logpdf_link = np.sum(-np.log(link_f) - y/link_f)
        return np.sum(log_objective)

-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
        Gradient of the log likelihood function at y, given link(f) w.r.t link(f)

@ -76,7 +73,7 @@ class Exponential(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
        :returns: gradient of likelihood evaluated at points
        :rtype: Nx1 array

@ -86,7 +83,7 @@ class Exponential(Likelihood):
        #grad = y/(link_f**2) - 1./link_f
        return grad

-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
        """
        Hessian at y, given link(f), w.r.t link(f)
        i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
@ -99,7 +96,7 @@ class Exponential(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
        :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
        :rtype: Nx1 array

@ -112,7 +109,7 @@ class Exponential(Likelihood):
        #hess = -2*y/(link_f**3) + 1/(link_f**2)
        return hess

-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
        """
        Third order derivative log-likelihood function at y given link(f) w.r.t link(f)

@ -123,7 +120,7 @@ class Exponential(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
@ -132,18 +129,6 @@ class Exponential(Likelihood):
        #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
        return d3lik_dlink3

-    def _mean(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)
-
-    def _variance(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)**2
-
    def samples(self, gp):
        """
        Returns a set of samples of observations based on a given value of the latent variable.
--- a/GPy/likelihoods/gamma.py
+++ b/GPy/likelihoods/gamma.py
@ -1,11 +1,12 @@
-# Copyright (c) 2012, 2013 Ricardo Andrade
+# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)


 import numpy as np
 from scipy import stats,special
 import scipy as sp
-from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
+from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
+from ..core.parameterization import Param
 import link_functions
 from likelihood import Likelihood

@ -18,14 +19,16 @@ class Gamma(Likelihood):
        \\alpha_{i} = \\beta y_{i}

    """
-    def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
-        self.beta = beta
-        super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
+    def __init__(self,gp_link=None,beta=1.):
+        if gp_link is None:
+            gp_link = link_functions.Log()
+        super(Gamma, self).__init__(gp_link, 'Gamma')

-    def _preprocess_values(self,Y):
-        return Y
+        self.beta = Param('beta', beta)
+        self.add_parameter(self.beta)
+        self.beta.fix()#TODO: gradients!

-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
        """
        Likelihood function given link(f)

@ -37,7 +40,7 @@ class Gamma(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
        :returns: likelihood evaluated for this point
        :rtype: float
        """
@ -47,7 +50,7 @@ class Gamma(Likelihood):
        objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
        return np.exp(np.sum(np.log(objective)))

-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
        """
        Log Likelihood Function given link(f)

@ -59,7 +62,7 @@ class Gamma(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
        :returns: likelihood evaluated for this point
        :rtype: float

@ -71,7 +74,7 @@ class Gamma(Likelihood):
        log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
        return np.sum(log_objective)

-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
        Gradient of the log likelihood function at y, given link(f) w.r.t link(f)

@ -83,7 +86,7 @@ class Gamma(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in gamma distribution
+        :param Y_metadata: Y_metadata which is not used in gamma distribution
        :returns: gradient of likelihood evaluated at points
        :rtype: Nx1 array

@ -94,7 +97,7 @@ class Gamma(Likelihood):
        #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
        return grad

-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
        """
        Hessian at y, given link(f), w.r.t link(f)
        i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
@ -108,7 +111,7 @@ class Gamma(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in gamma distribution
+        :param Y_metadata: Y_metadata which is not used in gamma distribution
        :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
        :rtype: Nx1 array

@ -122,7 +125,7 @@ class Gamma(Likelihood):
        #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
        return hess

-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
        """
        Third order derivative log-likelihood function at y given link(f) w.r.t link(f)

@ -134,22 +137,10 @@ class Gamma(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in gamma distribution
+        :param Y_metadata: Y_metadata which is not used in gamma distribution
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
        return d3lik_dlink3
-
-    def _mean(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)
-
-    def _variance(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)/self.beta
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -35,12 +35,7 @@ class Gaussian(Likelihood):
        if gp_link is None:
            gp_link = link_functions.Identity()

-        if isinstance(gp_link, link_functions.Identity):
-            analytical_variance = True
-            analytical_mean = True
-        else:
-            analytical_variance = False
-            analytical_mean = False
+        assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"

        super(Gaussian, self).__init__(gp_link, name=name)

@ -100,7 +95,7 @@ class Gaussian(Likelihood):
    def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
        return  [stats.norm.ppf(q/100.)*np.sqrt(var) + mu for q in quantiles]

-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
        """
        Likelihood function given link(f)

@ -111,14 +106,14 @@ class Gaussian(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
        :returns: likelihood evaluated for this point
        :rtype: float
        """
        #Assumes no covariance, exp, sum, log for numerical stability
        return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))

-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
        """
        Log likelihood function given link(f)

@ -129,7 +124,7 @@ class Gaussian(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
        :returns: log likelihood evaluated for this point
        :rtype: float
        """
@ -139,7 +134,7 @@ class Gaussian(Likelihood):

        return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))

-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
        Gradient of the pdf at y, given link(f) w.r.t link(f)

@ -150,7 +145,7 @@ class Gaussian(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
        :returns: gradient of log likelihood evaluated at points link(f)
        :rtype: Nx1 array
        """
@ -159,7 +154,7 @@ class Gaussian(Likelihood):
        grad = s2_i*y - s2_i*link_f
        return grad

-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
        """
        Hessian at y, given link_f, w.r.t link_f.
        i.e. second derivative logpdf at y given link(f_i) link(f_j)  w.r.t link(f_i) and link(f_j)
@ -173,7 +168,7 @@ class Gaussian(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
        :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
        :rtype: Nx1 array

@ -186,7 +181,7 @@ class Gaussian(Likelihood):
        hess = -(1.0/self.variance)*np.ones((N, 1))
        return hess

-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
        """
        Third order derivative log-likelihood function at y given link(f) w.r.t link(f)

@ -197,7 +192,7 @@ class Gaussian(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
        :returns: third derivative of log likelihood evaluated at points link(f)
        :rtype: Nx1 array
        """
@ -206,7 +201,7 @@ class Gaussian(Likelihood):
        d3logpdf_dlink3 = np.zeros((N,1))
        return d3logpdf_dlink3

-    def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
        """
        Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)

@ -217,7 +212,7 @@ class Gaussian(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
        :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
        :rtype: float
        """
@ -228,7 +223,7 @@ class Gaussian(Likelihood):
        dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
        return np.sum(dlik_dsigma) # Sure about this sum?

-    def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
        """
        Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)

@ -239,7 +234,7 @@ class Gaussian(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
        :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
        :rtype: Nx1 array
        """
@ -248,7 +243,7 @@ class Gaussian(Likelihood):
        dlik_grad_dsigma = -s_4*y + s_4*link_f
        return dlik_grad_dsigma

-    def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
        """
        Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)

@ -259,7 +254,7 @@ class Gaussian(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
        :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
        :rtype: Nx1 array
        """
@ -269,16 +264,16 @@ class Gaussian(Likelihood):
        d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
        return d2logpdf_dlink2_dvar

-    def dlogpdf_link_dtheta(self, f, y, extra_data=None):
-        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
+    def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
+        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
        return np.asarray([[dlogpdf_dvar]])

-    def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
-        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
+    def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
+        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
        return dlogpdf_dlink_dvar

-    def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
-        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
+    def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
+        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
        return d2logpdf_dlink2_dvar

    def _mean(self, gp):
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -153,6 +153,10 @@ class Likelihood(Parameterized):

        return mean

+    def _conditional_mean(self, f):
+        """Quadrature calculation of the conditional mean: E(Y_star|f)"""
+        raise NotImplementedError, "implement this function to make predictions"
+
    def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
        """
        Numerical approximation to the predictive variance: V(Y_star)
@ -204,31 +208,31 @@ class Likelihood(Parameterized):
        # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
        return exp_var + var_exp

-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
        raise NotImplementedError

-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
        raise NotImplementedError

-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        raise NotImplementedError

-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
        raise NotImplementedError

-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
        raise NotImplementedError

-    def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
+    def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None):
        raise NotImplementedError

-    def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None):
        raise NotImplementedError

-    def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None):
        raise NotImplementedError

-    def pdf(self, f, y, extra_data=None):
+    def pdf(self, f, y, Y_metadata=None):
        """
        Evaluates the link function link(f) then computes the likelihood (pdf) using it

@ -239,14 +243,14 @@ class Likelihood(Parameterized):
        :type f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
        :returns: likelihood evaluated for this point
        :rtype: float
        """
        link_f = self.gp_link.transf(f)
-        return self.pdf_link(link_f, y, extra_data=extra_data)
+        return self.pdf_link(link_f, y, Y_metadata=Y_metadata)

-    def logpdf(self, f, y, extra_data=None):
+    def logpdf(self, f, y, Y_metadata=None):
        """
        Evaluates the link function link(f) then computes the log likelihood (log pdf) using it

@ -257,14 +261,14 @@ class Likelihood(Parameterized):
        :type f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
        :returns: log likelihood evaluated for this point
        :rtype: float
        """
        link_f = self.gp_link.transf(f)
-        return self.logpdf_link(link_f, y, extra_data=extra_data)
+        return self.logpdf_link(link_f, y, Y_metadata=Y_metadata)

-    def dlogpdf_df(self, f, y, extra_data=None):
+    def dlogpdf_df(self, f, y, Y_metadata=None):
        """
        Evaluates the link function link(f) then computes the derivative of log likelihood using it
        Uses the Faa di Bruno's formula for the chain rule
@ -276,16 +280,16 @@ class Likelihood(Parameterized):
        :type f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
        :returns: derivative of log likelihood evaluated for this point
        :rtype: 1xN array
        """
        link_f = self.gp_link.transf(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
        dlink_df = self.gp_link.dtransf_df(f)
        return chain_1(dlogpdf_dlink, dlink_df)

-    def d2logpdf_df2(self, f, y, extra_data=None):
+    def d2logpdf_df2(self, f, y, Y_metadata=None):
        """
        Evaluates the link function link(f) then computes the second derivative of log likelihood using it
        Uses the Faa di Bruno's formula for the chain rule
@ -297,18 +301,18 @@ class Likelihood(Parameterized):
        :type f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
        :returns: second derivative of log likelihood evaluated for this point (diagonal only)
        :rtype: 1xN array
        """
        link_f = self.gp_link.transf(f)
-        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
+        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
        dlink_df = self.gp_link.dtransf_df(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
        d2link_df2 = self.gp_link.d2transf_df2(f)
        return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)

-    def d3logpdf_df3(self, f, y, extra_data=None):
+    def d3logpdf_df3(self, f, y, Y_metadata=None):
        """
        Evaluates the link function link(f) then computes the third derivative of log likelihood using it
        Uses the Faa di Bruno's formula for the chain rule
@ -320,44 +324,44 @@ class Likelihood(Parameterized):
        :type f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
        :returns: third derivative of log likelihood evaluated for this point
        :rtype: float
        """
        link_f = self.gp_link.transf(f)
-        d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
+        d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata)
        dlink_df = self.gp_link.dtransf_df(f)
-        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
+        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
        d2link_df2 = self.gp_link.d2transf_df2(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
        d3link_df3 = self.gp_link.d3transf_df3(f)
        return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)

-    def dlogpdf_dtheta(self, f, y, extra_data=None):
+    def dlogpdf_dtheta(self, f, y, Y_metadata=None):
        """
        TODO: Doc strings
        """
        if self.size > 0:
            link_f = self.gp_link.transf(f)
-            return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
+            return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata)
        else:
            #Is no parameters so return an empty array for its derivatives
            return np.zeros([1, 0])

-    def dlogpdf_df_dtheta(self, f, y, extra_data=None):
+    def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
        """
        TODO: Doc strings
        """
        if self.size > 0:
            link_f = self.gp_link.transf(f)
            dlink_df = self.gp_link.dtransf_df(f)
-            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
+            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
            return chain_1(dlogpdf_dlink_dtheta, dlink_df)
        else:
            #Is no parameters so return an empty array for its derivatives
            return np.zeros([f.shape[0], 0])

-    def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
+    def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
        """
        TODO: Doc strings
        """
@ -365,17 +369,17 @@ class Likelihood(Parameterized):
            link_f = self.gp_link.transf(f)
            dlink_df = self.gp_link.dtransf_df(f)
            d2link_df2 = self.gp_link.d2transf_df2(f)
-            d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
-            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
+            d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata)
+            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
            return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
        else:
            #Is no parameters so return an empty array for its derivatives
            return np.zeros([f.shape[0], 0])

-    def _laplace_gradients(self, f, y, extra_data=None):
-        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
-        dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
-        d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
+    def _laplace_gradients(self, f, y, Y_metadata=None):
+        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
+        dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
+        d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)

        #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
        # ensure we have gradients for every parameter we want to optimize
@ -390,7 +394,7 @@ class Likelihood(Parameterized):

    def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
        """
-        Compute  mean, variance and conficence interval (percentiles 5 and 95) of the  prediction.
+        Compute  mean, variance of the  predictive distibution.

        :param mu: mean of the latent variable, f, of posterior
        :param var: variance of the latent variable, f, of posterior
--- a/GPy/likelihoods/mixed_noise.py
+++ b/GPy/likelihoods/mixed_noise.py
@ -24,10 +24,11 @@ class MixedNoise(Likelihood):
        variance = np.zeros(ind.size)
        for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
            variance[ind==j] = lik.variance
-        return variance[:,None]
+        return variance

    def betaY(self,Y,Y_metadata):
-        return Y/self.gaussian_variance(Y_metadata=Y_metadata)
+        #TODO not here.
+        return Y/self.gaussian_variance(Y_metadata=Y_metadata)[:,None]

    def update_gradients(self, gradients):
        self.gradient = gradients
@ -60,10 +61,6 @@ class MixedNoise(Likelihood):
            Q[ind==j,:] = np.hstack(q)
        return [q[:,None] for q in Q.T]

-    def covariance_matrix(self, Y, Y_metadata):
-        #TODO make more general, to allow non-gaussian likelihoods
-        return np.diag(self.gaussian_variance(Y_metadata).flatten())
-
    def samples(self, gp, Y_metadata):
        """
        Returns a set of samples of observations based on a given value of the latent variable.
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@ -25,10 +25,13 @@ class Poisson(Likelihood):

        super(Poisson, self).__init__(gp_link, name='Poisson')

-    def _preprocess_values(self,Y):
-        return Y
+    def _conditional_mean(self, f):
+        """
+        the expected value of y given a value of f
+        """
+        return self.gp_link.transf(gp)

-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
        """
        Likelihood function given link(f)

@ -39,14 +42,14 @@ class Poisson(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
        :returns: likelihood evaluated for this point
        :rtype: float
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        return np.prod(stats.poisson.pmf(y,link_f))

-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
        """
        Log Likelihood Function given link(f)

@ -57,7 +60,7 @@ class Poisson(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
        :returns: likelihood evaluated for this point
        :rtype: float

@ -65,7 +68,7 @@ class Poisson(Likelihood):
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))

-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
        Gradient of the log likelihood function at y, given link(f) w.r.t link(f)

@ -76,7 +79,7 @@ class Poisson(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
        :returns: gradient of likelihood evaluated at points
        :rtype: Nx1 array

@ -84,7 +87,7 @@ class Poisson(Likelihood):
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        return y/link_f - 1

-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
        """
        Hessian at y, given link(f), w.r.t link(f)
        i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
@ -97,7 +100,7 @@ class Poisson(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
        :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
        :rtype: Nx1 array

@ -112,7 +115,7 @@ class Poisson(Likelihood):
        #transf = self.gp_link.transf(gp)
        #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df

-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
        """
        Third order derivative log-likelihood function at y given link(f) w.r.t link(f)

@ -123,7 +126,7 @@ class Poisson(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@ -43,10 +43,10 @@ class StudentT(Likelihood):
        Pull out the gradients, be careful as the order must match the order
        in which the parameters are added
        """
-        self.sigma2.gradient = grads[0]
-        self.v.gradient = grads[1]
+        self.sigma2.gradient = derivatives[0]
+        self.v.gradient = derivatives[1]

-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
        """
        Likelihood function given link(f)

@ -57,7 +57,7 @@ class StudentT(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
        :returns: likelihood evaluated for this point
        :rtype: float
        """
@ -70,7 +70,7 @@ class StudentT(Likelihood):
                    )
        return np.prod(objective)

-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
        """
        Log Likelihood Function given link(f)

@ -81,7 +81,7 @@ class StudentT(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
        :returns: likelihood evaluated for this point
        :rtype: float

@ -99,7 +99,7 @@ class StudentT(Likelihood):
                    )
        return np.sum(objective)

-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
        """
        Gradient of the log likelihood function at y, given link(f) w.r.t link(f)

@ -110,7 +110,7 @@ class StudentT(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
        :returns: gradient of likelihood evaluated at points
        :rtype: Nx1 array

@ -120,7 +120,7 @@ class StudentT(Likelihood):
        grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
        return grad

-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
        """
        Hessian at y, given link(f), w.r.t link(f)
        i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
@ -133,7 +133,7 @@ class StudentT(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
        :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
        :rtype: Nx1 array

@ -146,7 +146,7 @@ class StudentT(Likelihood):
        hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
        return hess

-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
        """
        Third order derivative log-likelihood function at y given link(f) w.r.t link(f)

@ -157,7 +157,7 @@ class StudentT(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
        :returns: third derivative of likelihood evaluated at points f
        :rtype: Nx1 array
        """
@ -168,7 +168,7 @@ class StudentT(Likelihood):
                    )
        return d3lik_dlink3

-    def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
        """
        Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)

@ -179,7 +179,7 @@ class StudentT(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
        :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
        :rtype: float
        """
@ -188,7 +188,7 @@ class StudentT(Likelihood):
        dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
        return np.sum(dlogpdf_dvar)

-    def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
        """
        Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)

@ -199,7 +199,7 @@ class StudentT(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
        :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
        :rtype: Nx1 array
        """
@ -208,7 +208,7 @@ class StudentT(Likelihood):
        dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
        return dlogpdf_dlink_dvar

-    def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
        """
        Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)

@ -219,7 +219,7 @@ class StudentT(Likelihood):
        :type link_f: Nx1 array
        :param y: data
        :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
        :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
        :rtype: Nx1 array
        """
@ -230,25 +230,22 @@ class StudentT(Likelihood):
                           )
        return d2logpdf_dlink2_dvar

-    def dlogpdf_link_dtheta(self, f, y, extra_data=None):
-        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
+    def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
+        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
        dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
        return np.hstack((dlogpdf_dvar, dlogpdf_dv))

-    def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
-        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
+    def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
+        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
        dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
        return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))

-    def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
-        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
+    def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
+        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
        d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
        return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))

    def predictive_mean(self, mu, sigma, Y_metadata=None):
-        """
-        Compute mean of the prediction
-        """
        return self.gp_link.transf(mu) # only true in link is monotoci, which it is.

    def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):