diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index 2f74858a..c575bb33 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -6,3 +6,4 @@ import regression
 import dimensionality_reduction
 import tutorials
 import stochastic
+import non_gaussian
diff --git a/GPy/examples/non_gaussian.py b/GPy/examples/non_gaussian.py
index 2a5e0c42..1c3cab76 100644
--- a/GPy/examples/non_gaussian.py
+++ b/GPy/examples/non_gaussian.py
@@ -36,28 +36,28 @@ def student_t_approx(optimize=True, plot=True):
     edited_real_sd = initial_var_guess
 
     # Kernel object
-    kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
-    kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
-    kernel3 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
-    kernel4 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
+    kernel1 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
+    kernel2 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
+    kernel3 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
+    kernel4 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
 
     #Gaussian GP model on clean data
-    #m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
-    ## optimize
-    #m1['white'].constrain_fixed(1e-5)
-    #m1.randomize()
+    m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
+    # optimize
+    m1['.*white'].constrain_fixed(1e-5)
+    m1.randomize()
 
     ##Gaussian GP model on corrupt data
-    #m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
-    #m1['white'].constrain_fixed(1e-5)
-    #m2.randomize()
+    m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
+    m1['.*white'].constrain_fixed(1e-5)
+    m2.randomize()
 
     #Student t GP model on clean data
     t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
     laplace_inf = GPy.inference.latent_function_inference.Laplace()
     m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf)
-    m3['t_noise'].constrain_bounded(1e-6, 10.)
-    m3['white'].constrain_fixed(1e-5)
+    m3['.*t_noise'].constrain_bounded(1e-6, 10.)
+    m3['.*white'].constrain_fixed(1e-5)
     m3.randomize()
     debug = True
     print m3
@@ -69,8 +69,8 @@ def student_t_approx(optimize=True, plot=True):
     t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
     laplace_inf = GPy.inference.latent_function_inference.Laplace()
     m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf)
-    m4['t_noise'].constrain_bounded(1e-6, 10.)
-    m4['white'].constrain_fixed(1e-5)
+    m4['.*t_noise'].constrain_bounded(1e-6, 10.)
+    m4['.*white'].constrain_fixed(1e-5)
     m4.randomize()
 
     if optimize:
@@ -153,7 +153,7 @@ def boston_example(optimize=True, plot=True):
         #Gaussian GP
         print "Gauss GP"
         mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
-        mgp.constrain_fixed('white', 1e-5)
+        mgp.constrain_fixed('.*white', 1e-5)
         mgp['rbf_len'] = rbf_len
         mgp['noise'] = noise
         print mgp
@@ -171,7 +171,7 @@ def boston_example(optimize=True, plot=True):
         g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
         mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
         mg.constrain_positive('noise_variance')
-        mg.constrain_fixed('white', 1e-5)
+        mg.constrain_fixed('.*white', 1e-5)
         mg['rbf_len'] = rbf_len
         mg['noise'] = noise
         print mg
@@ -189,10 +189,10 @@ def boston_example(optimize=True, plot=True):
             t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
             stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
             mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
-            mstu_t.constrain_fixed('white', 1e-5)
-            mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
+            mstu_t.constrain_fixed('.*white', 1e-5)
+            mstu_t.constrain_bounded('.*t_noise', 0.0001, 1000)
             mstu_t['rbf_len'] = rbf_len
-            mstu_t['t_noise'] = noise
+            mstu_t['.*t_noise'] = noise
             print mstu_t
             if optimize:
                 mstu_t.optimize(optimizer=optimizer, messages=messages)
diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 50a40449..96a47512 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -87,13 +87,13 @@ class Laplace(object):
 
         #define the objective function (to be maximised)
         def obj(Ki_f, f):
-            return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata)
+            return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata)
 
         difference = np.inf
         iteration = 0
         while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
-            W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata)
-            grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata)
+            W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
+            grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
 
             W_f = W*f
 
@@ -143,7 +143,7 @@ class Laplace(object):
                  dL_dthetaL : array of derivatives (1 x num_likelihood_params)
         """
         #At this point get the hessian matrix (or vector as W is diagonal)
-        W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata)
+        W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
 
         K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
 
@@ -152,11 +152,11 @@ class Laplace(object):
         Ki_W_i  = K - C.T.dot(C)
 
         #compute the log marginal
-        log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L)))
+        log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
 
         #Compute vival matrices for derivatives
-        dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat
-        woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata)
+        dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
+        woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
         dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
         #BiK, _ = dpotrs(L, K, lower=1)
         #dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
@@ -180,7 +180,7 @@ class Laplace(object):
         #compute dL_dthetaL#
         ####################
         if likelihood.size > 0 and not likelihood.is_fixed:
-            dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata)
+            dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, Y_metadata=Y_metadata)
 
             num_params = likelihood.size
             # make space for one derivative for each likelihood parameter
diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index 10df906d..7542f1bd 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -95,7 +95,7 @@ class Bernoulli(Likelihood):
             return np.nan
             #raise NotImplementedError
 
-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
         """
         Likelihood function given link(f)
 
@@ -106,7 +106,7 @@ class Bernoulli(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
         :returns: likelihood evaluated for this point
         :rtype: float
 
@@ -118,7 +118,7 @@ class Bernoulli(Likelihood):
         objective = np.where(y, link_f, 1.-link_f)
         return np.exp(np.sum(np.log(objective)))
 
-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
         """
         Log Likelihood function given link(f)
 
@@ -129,7 +129,7 @@ class Bernoulli(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
         :returns: log likelihood evaluated at points link(f)
         :rtype: float
         """
@@ -140,7 +140,7 @@ class Bernoulli(Likelihood):
         np.seterr(**state)
         return np.sum(objective)
 
-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
         Gradient of the pdf at y, given link(f) w.r.t link(f)
 
@@ -151,7 +151,7 @@ class Bernoulli(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
         :returns: gradient of log likelihood evaluated at points link(f)
         :rtype: Nx1 array
         """
@@ -162,7 +162,7 @@ class Bernoulli(Likelihood):
         np.seterr(**state)
         return grad
 
-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
         """
         Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
         i.e. second derivative logpdf at y given link(f_i) link(f_j)  w.r.t link(f_i) and link(f_j)
@@ -175,7 +175,7 @@ class Bernoulli(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
         :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
         :rtype: Nx1 array
 
@@ -190,7 +190,7 @@ class Bernoulli(Likelihood):
         np.seterr(**state)
         return d2logpdf_dlink2
 
-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         """
         Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
 
@@ -201,7 +201,7 @@ class Bernoulli(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in bernoulli
+        :param Y_metadata: Y_metadata not used in bernoulli
         :returns: third derivative of log likelihood evaluated at points link(f)
         :rtype: Nx1 array
         """
diff --git a/GPy/likelihoods/exponential.py b/GPy/likelihoods/exponential.py
index 8d2e8cdc..1dd548f6 100644
--- a/GPy/likelihoods/exponential.py
+++ b/GPy/likelihoods/exponential.py
@@ -18,13 +18,12 @@ class Exponential(Likelihood):
     L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
     $$
     """
-    def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
-        super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
+    def __init__(self,gp_link=None):
+        if gp_link is None:
+            gp_link = link_functions.Log()
+        super(Exponential, self).__init__(gp_link, 'ExpLikelihood')
 
-    def _preprocess_values(self,Y):
-        return Y
-
-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
         """
         Likelihood function given link(f)
 
@@ -35,16 +34,15 @@ class Exponential(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
         :returns: likelihood evaluated for this point
         :rtype: float
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         log_objective = link_f*np.exp(-y*link_f)
         return np.exp(np.sum(np.log(log_objective)))
-        #return np.exp(np.sum(-y/link_f - np.log(link_f) ))
 
-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
         """
         Log Likelihood Function given link(f)
 
@@ -55,17 +53,16 @@ class Exponential(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
         :returns: likelihood evaluated for this point
         :rtype: float
 
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         log_objective = np.log(link_f) - y*link_f
-        #logpdf_link = np.sum(-np.log(link_f) - y/link_f)
         return np.sum(log_objective)
 
-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
         Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
 
@@ -76,7 +73,7 @@ class Exponential(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
         :returns: gradient of likelihood evaluated at points
         :rtype: Nx1 array
 
@@ -86,7 +83,7 @@ class Exponential(Likelihood):
         #grad = y/(link_f**2) - 1./link_f
         return grad
 
-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
         """
         Hessian at y, given link(f), w.r.t link(f)
         i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
@@ -99,7 +96,7 @@ class Exponential(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
         :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
         :rtype: Nx1 array
 
@@ -112,7 +109,7 @@ class Exponential(Likelihood):
         #hess = -2*y/(link_f**3) + 1/(link_f**2)
         return hess
 
-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         """
         Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
 
@@ -123,7 +120,7 @@ class Exponential(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in exponential distribution
+        :param Y_metadata: Y_metadata which is not used in exponential distribution
         :returns: third derivative of likelihood evaluated at points f
         :rtype: Nx1 array
         """
@@ -132,18 +129,6 @@ class Exponential(Likelihood):
         #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
         return d3lik_dlink3
 
-    def _mean(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)
-
-    def _variance(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)**2
-
     def samples(self, gp):
         """
         Returns a set of samples of observations based on a given value of the latent variable.
diff --git a/GPy/likelihoods/gamma.py b/GPy/likelihoods/gamma.py
index 0ac70a9f..a6436616 100644
--- a/GPy/likelihoods/gamma.py
+++ b/GPy/likelihoods/gamma.py
@@ -1,11 +1,12 @@
-# Copyright (c) 2012, 2013 Ricardo Andrade
+# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
 import numpy as np
 from scipy import stats,special
 import scipy as sp
-from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
+from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
+from ..core.parameterization import Param
 import link_functions
 from likelihood import Likelihood
 
@@ -18,14 +19,16 @@ class Gamma(Likelihood):
         \\alpha_{i} = \\beta y_{i}
 
     """
-    def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
-        self.beta = beta
-        super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
+    def __init__(self,gp_link=None,beta=1.):
+        if gp_link is None:
+            gp_link = link_functions.Log()
+        super(Gamma, self).__init__(gp_link, 'Gamma')
 
-    def _preprocess_values(self,Y):
-        return Y
+        self.beta = Param('beta', beta)
+        self.add_parameter(self.beta)
+        self.beta.fix()#TODO: gradients!
 
-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
         """
         Likelihood function given link(f)
 
@@ -37,7 +40,7 @@ class Gamma(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
         :returns: likelihood evaluated for this point
         :rtype: float
         """
@@ -47,7 +50,7 @@ class Gamma(Likelihood):
         objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
         return np.exp(np.sum(np.log(objective)))
 
-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
         """
         Log Likelihood Function given link(f)
 
@@ -59,7 +62,7 @@ class Gamma(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
         :returns: likelihood evaluated for this point
         :rtype: float
 
@@ -71,7 +74,7 @@ class Gamma(Likelihood):
         log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
         return np.sum(log_objective)
 
-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
         Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
 
@@ -83,7 +86,7 @@ class Gamma(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in gamma distribution
+        :param Y_metadata: Y_metadata which is not used in gamma distribution
         :returns: gradient of likelihood evaluated at points
         :rtype: Nx1 array
 
@@ -94,7 +97,7 @@ class Gamma(Likelihood):
         #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
         return grad
 
-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
         """
         Hessian at y, given link(f), w.r.t link(f)
         i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
@@ -108,7 +111,7 @@ class Gamma(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in gamma distribution
+        :param Y_metadata: Y_metadata which is not used in gamma distribution
         :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
         :rtype: Nx1 array
 
@@ -122,7 +125,7 @@ class Gamma(Likelihood):
         #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
         return hess
 
-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         """
         Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
 
@@ -134,22 +137,10 @@ class Gamma(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in gamma distribution
+        :param Y_metadata: Y_metadata which is not used in gamma distribution
         :returns: third derivative of likelihood evaluated at points f
         :rtype: Nx1 array
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
         return d3lik_dlink3
-
-    def _mean(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)
-
-    def _variance(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)/self.beta
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index b82750ac..214db738 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -93,7 +93,7 @@ class Gaussian(Likelihood):
     def predictive_variance(self, mu, sigma, predictive_mean=None):
         return self.variance + sigma**2
 
-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
         """
         Likelihood function given link(f)
 
@@ -104,14 +104,14 @@ class Gaussian(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
         :returns: likelihood evaluated for this point
         :rtype: float
         """
         #Assumes no covariance, exp, sum, log for numerical stability
         return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
 
-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
         """
         Log likelihood function given link(f)
 
@@ -122,7 +122,7 @@ class Gaussian(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
         :returns: log likelihood evaluated for this point
         :rtype: float
         """
@@ -132,7 +132,7 @@ class Gaussian(Likelihood):
 
         return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
 
-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
         Gradient of the pdf at y, given link(f) w.r.t link(f)
 
@@ -143,7 +143,7 @@ class Gaussian(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
         :returns: gradient of log likelihood evaluated at points link(f)
         :rtype: Nx1 array
         """
@@ -152,7 +152,7 @@ class Gaussian(Likelihood):
         grad = s2_i*y - s2_i*link_f
         return grad
 
-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
         """
         Hessian at y, given link_f, w.r.t link_f.
         i.e. second derivative logpdf at y given link(f_i) link(f_j)  w.r.t link(f_i) and link(f_j)
@@ -166,7 +166,7 @@ class Gaussian(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
         :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
         :rtype: Nx1 array
 
@@ -179,7 +179,7 @@ class Gaussian(Likelihood):
         hess = -(1.0/self.variance)*np.ones((N, 1))
         return hess
 
-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         """
         Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
 
@@ -190,7 +190,7 @@ class Gaussian(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
         :returns: third derivative of log likelihood evaluated at points link(f)
         :rtype: Nx1 array
         """
@@ -199,7 +199,7 @@ class Gaussian(Likelihood):
         d3logpdf_dlink3 = np.zeros((N,1))
         return d3logpdf_dlink3
 
-    def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
         """
         Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
 
@@ -210,7 +210,7 @@ class Gaussian(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
         :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
         :rtype: float
         """
@@ -221,7 +221,7 @@ class Gaussian(Likelihood):
         dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
         return np.sum(dlik_dsigma) # Sure about this sum?
 
-    def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
         """
         Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
 
@@ -232,7 +232,7 @@ class Gaussian(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
         :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
         :rtype: Nx1 array
         """
@@ -241,7 +241,7 @@ class Gaussian(Likelihood):
         dlik_grad_dsigma = -s_4*y + s_4*link_f
         return dlik_grad_dsigma
 
-    def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
         """
         Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
 
@@ -252,7 +252,7 @@ class Gaussian(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data not used in gaussian
+        :param Y_metadata: Y_metadata not used in gaussian
         :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
         :rtype: Nx1 array
         """
@@ -262,16 +262,16 @@ class Gaussian(Likelihood):
         d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
         return d2logpdf_dlink2_dvar
 
-    def dlogpdf_link_dtheta(self, f, y, extra_data=None):
-        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
+    def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
+        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
         return np.asarray([[dlogpdf_dvar]])
 
-    def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
-        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
+    def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
+        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
         return dlogpdf_dlink_dvar
 
-    def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
-        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
+    def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
+        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
         return d2logpdf_dlink2_dvar
 
     def _mean(self, gp):
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 701a5a2f..070875f5 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -174,31 +174,31 @@ class Likelihood(Parameterized):
         # V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
         return exp_var + var_exp
 
-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
         raise NotImplementedError
 
-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
         raise NotImplementedError
 
-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         raise NotImplementedError
 
-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
         raise NotImplementedError
 
-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         raise NotImplementedError
 
-    def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
+    def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None):
         raise NotImplementedError
 
-    def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None):
         raise NotImplementedError
 
-    def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None):
         raise NotImplementedError
 
-    def pdf(self, f, y, extra_data=None):
+    def pdf(self, f, y, Y_metadata=None):
         """
         Evaluates the link function link(f) then computes the likelihood (pdf) using it
 
@@ -209,14 +209,14 @@ class Likelihood(Parameterized):
         :type f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
         :returns: likelihood evaluated for this point
         :rtype: float
         """
         link_f = self.gp_link.transf(f)
-        return self.pdf_link(link_f, y, extra_data=extra_data)
+        return self.pdf_link(link_f, y, Y_metadata=Y_metadata)
 
-    def logpdf(self, f, y, extra_data=None):
+    def logpdf(self, f, y, Y_metadata=None):
         """
         Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
 
@@ -227,14 +227,14 @@ class Likelihood(Parameterized):
         :type f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
         :returns: log likelihood evaluated for this point
         :rtype: float
         """
         link_f = self.gp_link.transf(f)
-        return self.logpdf_link(link_f, y, extra_data=extra_data)
+        return self.logpdf_link(link_f, y, Y_metadata=Y_metadata)
 
-    def dlogpdf_df(self, f, y, extra_data=None):
+    def dlogpdf_df(self, f, y, Y_metadata=None):
         """
         Evaluates the link function link(f) then computes the derivative of log likelihood using it
         Uses the Faa di Bruno's formula for the chain rule
@@ -246,16 +246,16 @@ class Likelihood(Parameterized):
         :type f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
         :returns: derivative of log likelihood evaluated for this point
         :rtype: 1xN array
         """
         link_f = self.gp_link.transf(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
         dlink_df = self.gp_link.dtransf_df(f)
         return chain_1(dlogpdf_dlink, dlink_df)
 
-    def d2logpdf_df2(self, f, y, extra_data=None):
+    def d2logpdf_df2(self, f, y, Y_metadata=None):
         """
         Evaluates the link function link(f) then computes the second derivative of log likelihood using it
         Uses the Faa di Bruno's formula for the chain rule
@@ -267,18 +267,18 @@ class Likelihood(Parameterized):
         :type f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
         :returns: second derivative of log likelihood evaluated for this point (diagonal only)
         :rtype: 1xN array
         """
         link_f = self.gp_link.transf(f)
-        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
+        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
         dlink_df = self.gp_link.dtransf_df(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
         d2link_df2 = self.gp_link.d2transf_df2(f)
         return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
 
-    def d3logpdf_df3(self, f, y, extra_data=None):
+    def d3logpdf_df3(self, f, y, Y_metadata=None):
         """
         Evaluates the link function link(f) then computes the third derivative of log likelihood using it
         Uses the Faa di Bruno's formula for the chain rule
@@ -290,44 +290,44 @@ class Likelihood(Parameterized):
         :type f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution - not used
+        :param Y_metadata: Y_metadata which is not used in student t distribution - not used
         :returns: third derivative of log likelihood evaluated for this point
         :rtype: float
         """
         link_f = self.gp_link.transf(f)
-        d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
+        d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata)
         dlink_df = self.gp_link.dtransf_df(f)
-        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
+        d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
         d2link_df2 = self.gp_link.d2transf_df2(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
+        dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
         d3link_df3 = self.gp_link.d3transf_df3(f)
         return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
 
-    def dlogpdf_dtheta(self, f, y, extra_data=None):
+    def dlogpdf_dtheta(self, f, y, Y_metadata=None):
         """
         TODO: Doc strings
         """
         if self.size > 0:
             link_f = self.gp_link.transf(f)
-            return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
+            return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata)
         else:
             #Is no parameters so return an empty array for its derivatives
             return np.zeros([1, 0])
 
-    def dlogpdf_df_dtheta(self, f, y, extra_data=None):
+    def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
         """
         TODO: Doc strings
         """
         if self.size > 0:
             link_f = self.gp_link.transf(f)
             dlink_df = self.gp_link.dtransf_df(f)
-            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
+            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
             return chain_1(dlogpdf_dlink_dtheta, dlink_df)
         else:
             #Is no parameters so return an empty array for its derivatives
             return np.zeros([f.shape[0], 0])
 
-    def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
+    def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
         """
         TODO: Doc strings
         """
@@ -335,17 +335,17 @@ class Likelihood(Parameterized):
             link_f = self.gp_link.transf(f)
             dlink_df = self.gp_link.dtransf_df(f)
             d2link_df2 = self.gp_link.d2transf_df2(f)
-            d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
-            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
+            d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata)
+            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
             return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
         else:
             #Is no parameters so return an empty array for its derivatives
             return np.zeros([f.shape[0], 0])
 
-    def _laplace_gradients(self, f, y, extra_data=None):
-        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
-        dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
-        d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
+    def _laplace_gradients(self, f, y, Y_metadata=None):
+        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
+        dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
+        d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
 
         #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
         # ensure we have gradients for every parameter we want to optimize
diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py
index ba6915b8..616447d9 100644
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@@ -28,7 +28,7 @@ class Poisson(Likelihood):
     def _preprocess_values(self,Y):
         return Y
 
-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
         """
         Likelihood function given link(f)
 
@@ -39,14 +39,14 @@ class Poisson(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
         :returns: likelihood evaluated for this point
         :rtype: float
         """
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         return np.prod(stats.poisson.pmf(y,link_f))
 
-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
         """
         Log Likelihood Function given link(f)
 
@@ -57,7 +57,7 @@ class Poisson(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
         :returns: likelihood evaluated for this point
         :rtype: float
 
@@ -65,7 +65,7 @@ class Poisson(Likelihood):
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
 
-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
         Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
 
@@ -76,7 +76,7 @@ class Poisson(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
         :returns: gradient of likelihood evaluated at points
         :rtype: Nx1 array
 
@@ -84,7 +84,7 @@ class Poisson(Likelihood):
         assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         return y/link_f - 1
 
-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
         """
         Hessian at y, given link(f), w.r.t link(f)
         i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
@@ -97,7 +97,7 @@ class Poisson(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
         :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
         :rtype: Nx1 array
 
@@ -112,7 +112,7 @@ class Poisson(Likelihood):
         #transf = self.gp_link.transf(gp)
         #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
 
-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         """
         Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
 
@@ -123,7 +123,7 @@ class Poisson(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in poisson distribution
+        :param Y_metadata: Y_metadata which is not used in poisson distribution
         :returns: third derivative of likelihood evaluated at points f
         :rtype: Nx1 array
         """
@@ -131,18 +131,6 @@ class Poisson(Likelihood):
         d3lik_dlink3 = 2*y/(link_f)**3
         return d3lik_dlink3
 
-    def _mean(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)
-
-    def _variance(self,gp):
-        """
-        Mass (or density) function
-        """
-        return self.gp_link.transf(gp)
-
     def samples(self, gp):
         """
         Returns a set of samples of observations based on a given value of the latent variable.
diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py
index ac93f204..565313cc 100644
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@@ -37,15 +37,15 @@ class StudentT(Likelihood):
     def parameters_changed(self):
         self.variance = (self.v / float(self.v - 2)) * self.sigma2
 
-    def update_gradients(self, partial):
+    def update_gradients(self, derivatives):
         """
         Pull out the gradients, be careful as the order must match the order
         in which the parameters are added
         """
-        self.sigma2.gradient = partial[0]
-        self.v.gradient = partial[1]
+        self.sigma2.gradient = derivatives[0]
+        self.v.gradient = derivatives[1]
 
-    def pdf_link(self, link_f, y, extra_data=None):
+    def pdf_link(self, link_f, y, Y_metadata=None):
         """
         Likelihood function given link(f)
 
@@ -56,7 +56,7 @@ class StudentT(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
         :returns: likelihood evaluated for this point
         :rtype: float
         """
@@ -69,7 +69,7 @@ class StudentT(Likelihood):
                     )
         return np.prod(objective)
 
-    def logpdf_link(self, link_f, y, extra_data=None):
+    def logpdf_link(self, link_f, y, Y_metadata=None):
         """
         Log Likelihood Function given link(f)
 
@@ -80,7 +80,7 @@ class StudentT(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
         :returns: likelihood evaluated for this point
         :rtype: float
 
@@ -98,7 +98,7 @@ class StudentT(Likelihood):
                     )
         return np.sum(objective)
 
-    def dlogpdf_dlink(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
         Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
 
@@ -109,7 +109,7 @@ class StudentT(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
         :returns: gradient of likelihood evaluated at points
         :rtype: Nx1 array
 
@@ -119,7 +119,7 @@ class StudentT(Likelihood):
         grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
         return grad
 
-    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
         """
         Hessian at y, given link(f), w.r.t link(f)
         i.e. second derivative logpdf at y given link(f_i) and link(f_j)  w.r.t link(f_i) and link(f_j)
@@ -132,7 +132,7 @@ class StudentT(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
         :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
         :rtype: Nx1 array
 
@@ -145,7 +145,7 @@ class StudentT(Likelihood):
         hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
         return hess
 
-    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
+    def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         """
         Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
 
@@ -156,7 +156,7 @@ class StudentT(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
         :returns: third derivative of likelihood evaluated at points f
         :rtype: Nx1 array
         """
@@ -167,7 +167,7 @@ class StudentT(Likelihood):
                     )
         return d3lik_dlink3
 
-    def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
         """
         Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
 
@@ -178,7 +178,7 @@ class StudentT(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
         :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
         :rtype: float
         """
@@ -187,7 +187,7 @@ class StudentT(Likelihood):
         dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
         return np.sum(dlogpdf_dvar)
 
-    def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
+    def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
         """
         Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
 
@@ -198,7 +198,7 @@ class StudentT(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
         :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
         :rtype: Nx1 array
         """
@@ -207,7 +207,7 @@ class StudentT(Likelihood):
         dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
         return dlogpdf_dlink_dvar
 
-    def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
+    def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
         """
         Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
 
@@ -218,7 +218,7 @@ class StudentT(Likelihood):
         :type link_f: Nx1 array
         :param y: data
         :type y: Nx1 array
-        :param extra_data: extra_data which is not used in student t distribution
+        :param Y_metadata: Y_metadata which is not used in student t distribution
         :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
         :rtype: Nx1 array
         """
@@ -229,18 +229,18 @@ class StudentT(Likelihood):
                            )
         return d2logpdf_dlink2_dvar
 
-    def dlogpdf_link_dtheta(self, f, y, extra_data=None):
-        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
+    def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
+        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
         dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
         return np.hstack((dlogpdf_dvar, dlogpdf_dv))
 
-    def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
-        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
+    def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
+        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
         dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
         return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
 
-    def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
-        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
+    def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
+        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
         d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
         return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
 
@@ -268,7 +268,6 @@ class StudentT(Likelihood):
         """
         Compute mean of the prediction
         """
-        #FIXME: Not correct
         return mu
 
     def samples(self, gp):
diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py
index d4105e3c..50adcbff 100644
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@@ -255,25 +255,25 @@ class TestNoiseModels(object):
                             "Y": self.binary_Y,
                             "ep": False # FIXME: Should be True when we have it working again
                             },
-                        #"Exponential_default": {
-                            #"model": GPy.likelihoods.exponential(),
-                            #"link_f_constraints": [constrain_positive],
-                            #"Y": self.positive_Y,
-                            #"laplace": True,
-                        #},
-                        #"Poisson_default": {
-                            #"model": GPy.likelihoods.poisson(),
-                            #"link_f_constraints": [constrain_positive],
-                            #"Y": self.integer_Y,
-                            #"laplace": True,
-                            #"ep": False #Should work though...
-                        #},
-                        #"Gamma_default": {
-                            #"model": GPy.likelihoods.gamma(),
-                            #"link_f_constraints": [constrain_positive],
-                            #"Y": self.positive_Y,
-                            #"laplace": True
-                        #}
+                        "Exponential_default": {
+                            "model": GPy.likelihoods.Exponential(),
+                            "link_f_constraints": [constrain_positive],
+                            "Y": self.positive_Y,
+                            "laplace": True,
+                        },
+                        "Poisson_default": {
+                            "model": GPy.likelihoods.Poisson(),
+                            "link_f_constraints": [constrain_positive],
+                            "Y": self.integer_Y,
+                            "laplace": True,
+                            "ep": False #Should work though...
+                        },
+                        "Gamma_default": {
+                            "model": GPy.likelihoods.Gamma(),
+                            "link_f_constraints": [constrain_positive],
+                            "Y": self.positive_Y,
+                            "laplace": True
+                        }
                     }
 
         for name, attributes in noise_models.iteritems():