Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-15 06:52:39 +02:00 · 2013-06-04 17:19:44 +01:00 · 2013-06-04 17:19:44 +01:00 · e29e5624f5
commit e29e5624f5
parent edae1032c7 3546650d15
7 changed files with 263 additions and 163 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -14,6 +14,7 @@ import priors
 import re
 import sys
 import pdb
+from GPy.core.domains import POSITIVE, REAL
 # import numdifftools as ndt

 class model(parameterised):
@ -68,8 +69,9 @@ class model(parameterised):


        # check constraints are okay
-        if isinstance(what, (priors.gamma, priors.inverse_gamma, priors.log_Gaussian)):
-            constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == 'positive']
+
+        if what.domain is POSITIVE:
+            constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == POSITIVE]
            if len(constrained_positive_indices):
                constrained_positive_indices = np.hstack(constrained_positive_indices)
            else:
@ -82,7 +84,7 @@ class model(parameterised):
                print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
                print '\n'
                self.constrain_positive(unconst)
-        elif isinstance(what, priors.Gaussian):
+        elif what.domain is REAL:
            assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
        else:
            raise ValueError, "prior not recognised"
--- a/GPy/core/priors.py
+++ b/GPy/core/priors.py
@ -6,17 +6,20 @@ import numpy as np
 import pylab as pb
 from scipy.special import gammaln, digamma
 from ..util.linalg import pdinv
+from GPy.core.domains import REAL, POSITIVE
+import warnings

 class prior:
-    def pdf(self,x):
+    domain = None
+    def pdf(self, x):
        return np.exp(self.lnpdf(x))

    def plot(self):
        rvs = self.rvs(1000)
-        pb.hist(rvs,100,normed=True)
-        xmin,xmax = pb.xlim()
-        xx = np.linspace(xmin,xmax,1000)
-        pb.plot(xx,self.pdf(xx),'r',linewidth=2)
+        pb.hist(rvs, 100, normed=True)
+        xmin, xmax = pb.xlim()
+        xx = np.linspace(xmin, xmax, 1000)
+        pb.plot(xx, self.pdf(xx), 'r', linewidth=2)


 class Gaussian(prior):
@ -29,24 +32,24 @@ class Gaussian(prior):
    .. Note:: Bishop 2006 notation is used throughout the code

    """
-
-    def __init__(self,mu,sigma):
+    domain = REAL
+    def __init__(self, mu, sigma):
        self.mu = float(mu)
        self.sigma = float(sigma)
        self.sigma2 = np.square(self.sigma)
-        self.constant = -0.5*np.log(2*np.pi*self.sigma2)
+        self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)

    def __str__(self):
-        return "N("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
+        return "N(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'

-    def lnpdf(self,x):
-        return self.constant - 0.5*np.square(x-self.mu)/self.sigma2
+    def lnpdf(self, x):
+        return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2

-    def lnpdf_grad(self,x):
-        return -(x-self.mu)/self.sigma2
+    def lnpdf_grad(self, x):
+        return -(x - self.mu) / self.sigma2

-    def rvs(self,n):
-        return np.random.randn(n)*self.sigma + self.mu
+    def rvs(self, n):
+        return np.random.randn(n) * self.sigma + self.mu


 class log_Gaussian(prior):
@ -59,24 +62,24 @@ class log_Gaussian(prior):
    .. Note:: Bishop 2006 notation is used throughout the code

    """
-
-    def __init__(self,mu,sigma):
+    domain = POSITIVE
+    def __init__(self, mu, sigma):
        self.mu = float(mu)
        self.sigma = float(sigma)
        self.sigma2 = np.square(self.sigma)
-        self.constant = -0.5*np.log(2*np.pi*self.sigma2)
+        self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)

    def __str__(self):
-        return "lnN("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
+        return "lnN(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'

-    def lnpdf(self,x):
-        return self.constant - 0.5*np.square(np.log(x)-self.mu)/self.sigma2 -np.log(x)
+    def lnpdf(self, x):
+        return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)

-    def lnpdf_grad(self,x):
-        return -((np.log(x)-self.mu)/self.sigma2+1.)/x
+    def lnpdf_grad(self, x):
+        return -((np.log(x) - self.mu) / self.sigma2 + 1.) / x

-    def rvs(self,n):
-        return np.exp(np.random.randn(n)*self.sigma + self.mu)
+    def rvs(self, n):
+        return np.exp(np.random.randn(n) * self.sigma + self.mu)


 class multivariate_Gaussian:
@ -89,47 +92,47 @@ class multivariate_Gaussian:
    .. Note:: Bishop 2006 notation is used throughout the code

    """
-
-    def __init__(self,mu,var):
+    domain = REAL
+    def __init__(self, mu, var):
        self.mu = np.array(mu).flatten()
        self.var = np.array(var)
-        assert len(self.var.shape)==2
-        assert self.var.shape[0]==self.var.shape[1]
-        assert self.var.shape[0]==self.mu.size
+        assert len(self.var.shape) == 2
+        assert self.var.shape[0] == self.var.shape[1]
+        assert self.var.shape[0] == self.mu.size
        self.D = self.mu.size
        self.inv, self.hld = pdinv(self.var)
-        self.constant = -0.5*self.D*np.log(2*np.pi) - self.hld
+        self.constant = -0.5 * self.D * np.log(2 * np.pi) - self.hld

    def summary(self):
        raise NotImplementedError

-    def pdf(self,x):
+    def pdf(self, x):
        return np.exp(self.lnpdf(x))

-    def lnpdf(self,x):
-        d = x-self.mu
-        return self.constant - 0.5*np.sum(d*np.dot(d,self.inv),1)
+    def lnpdf(self, x):
+        d = x - self.mu
+        return self.constant - 0.5 * np.sum(d * np.dot(d, self.inv), 1)

-    def lnpdf_grad(self,x):
-        d = x-self.mu
-        return -np.dot(self.inv,d)
+    def lnpdf_grad(self, x):
+        d = x - self.mu
+        return -np.dot(self.inv, d)

-    def rvs(self,n):
-        return np.random.multivariate_normal(self.mu, self.var,n)
+    def rvs(self, n):
+        return np.random.multivariate_normal(self.mu, self.var, n)

    def plot(self):
-        if self.D==2:
+        if self.D == 2:
            rvs = self.rvs(200)
-            pb.plot(rvs[:,0],rvs[:,1], 'kx', mew=1.5)
-            xmin,xmax = pb.xlim()
-            ymin,ymax = pb.ylim()
+            pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5)
+            xmin, xmax = pb.xlim()
+            ymin, ymax = pb.ylim()
            xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
-            xflat = np.vstack((xx.flatten(),yy.flatten())).T
-            zz = self.pdf(xflat).reshape(100,100)
-            pb.contour(xx,yy,zz,linewidths=2)
+            xflat = np.vstack((xx.flatten(), yy.flatten())).T
+            zz = self.pdf(xflat).reshape(100, 100)
+            pb.contour(xx, yy, zz, linewidths=2)


-def gamma_from_EV(E,V):
+def gamma_from_EV(E, V):
    """
    Creates an instance of a gamma prior  by specifying the Expected value(s)
    and Variance(s) of the distribution.
@ -138,10 +141,10 @@ def gamma_from_EV(E,V):
    :param V: variance

    """
-
-    a = np.square(E)/V
-    b = E/V
-    return gamma(a,b)
+    warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
+    a = np.square(E) / V
+    b = E / V
+    return gamma(a, b)

 class gamma(prior):
    """
@ -153,33 +156,34 @@ class gamma(prior):
    .. Note:: Bishop 2006 notation is used throughout the code

    """
-    def __init__(self,a,b):
+    domain = POSITIVE
+    def __init__(self, a, b):
        self.a = float(a)
        self.b = float(b)
-        self.constant = -gammaln(self.a) + a*np.log(b)
+        self.constant = -gammaln(self.a) + a * np.log(b)

    def __str__(self):
-        return "Ga("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
+        return "Ga(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'

    def summary(self):
-        ret = {"E[x]": self.a/self.b,\
-            "E[ln x]": digamma(self.a) - np.log(self.b),\
-            "var[x]": self.a/self.b/self.b,\
-            "Entropy": gammaln(self.a) - (self.a-1.)*digamma(self.a) - np.log(self.b) + self.a}
-        if self.a >1:
-            ret['Mode'] = (self.a-1.)/self.b
+        ret = {"E[x]": self.a / self.b, \
+            "E[ln x]": digamma(self.a) - np.log(self.b), \
+            "var[x]": self.a / self.b / self.b, \
+            "Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
+        if self.a > 1:
+            ret['Mode'] = (self.a - 1.) / self.b
        else:
            ret['mode'] = np.nan
        return ret

-    def lnpdf(self,x):
-        return self.constant + (self.a-1)*np.log(x) - self.b*x
+    def lnpdf(self, x):
+        return self.constant + (self.a - 1) * np.log(x) - self.b * x

-    def lnpdf_grad(self,x):
-        return (self.a-1.)/x - self.b
+    def lnpdf_grad(self, x):
+        return (self.a - 1.) / x - self.b

-    def rvs(self,n):
-        return np.random.gamma(scale=1./self.b,shape=self.a,size=n)
+    def rvs(self, n):
+        return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)

 class inverse_gamma(prior):
    """
@ -191,19 +195,20 @@ class inverse_gamma(prior):
    .. Note:: Bishop 2006 notation is used throughout the code

    """
-    def __init__(self,a,b):
+    domain = POSITIVE
+    def __init__(self, a, b):
        self.a = float(a)
        self.b = float(b)
-        self.constant = -gammaln(self.a) + a*np.log(b)
+        self.constant = -gammaln(self.a) + a * np.log(b)

    def __str__(self):
-        return "iGa("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
+        return "iGa(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'

-    def lnpdf(self,x):
-        return self.constant - (self.a+1)*np.log(x) - self.b/x
+    def lnpdf(self, x):
+        return self.constant - (self.a + 1) * np.log(x) - self.b / x

-    def lnpdf_grad(self,x):
-        return -(self.a+1.)/x + self.b/x**2
+    def lnpdf_grad(self, x):
+        return -(self.a + 1.) / x + self.b / x ** 2

-    def rvs(self,n):
-        return 1./np.random.gamma(scale=1./self.b,shape=self.a,size=n)
+    def rvs(self, n):
+        return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
--- a/GPy/core/transformations.py
+++ b/GPy/core/transformations.py
@ -3,11 +3,10 @@


 import numpy as np
+from GPy.core.domains import POSITIVE, NEGATIVE, BOUNDED

 class transformation(object):
-    def __init__(self):
-        # set the domain. Suggest we use 'positive', 'bounded', etc
-        self.domain = 'undefined'
+    domain = None
    def f(self, x):
        raise NotImplementedError

@ -24,8 +23,7 @@ class transformation(object):
        raise NotImplementedError

 class logexp(transformation):
-    def __init__(self):
-        self.domain = 'positive'
+    domain = POSITIVE
    def f(self, x):
        return np.log(1. + np.exp(x))
    def finv(self, f):
@ -43,8 +41,8 @@ class logexp_clipped(transformation):
    min_bound = 1e-10
    log_max_bound = np.log(max_bound)
    log_min_bound = np.log(min_bound)
+    domain = POSITIVE
    def __init__(self, lower=1e-6):
-        self.domain = 'positive'
        self.lower = lower
    def f(self, x):
        exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
@ -66,8 +64,7 @@ class logexp_clipped(transformation):
        return '(+ve_c)'

 class exponent(transformation):
-    def __init__(self):
-        self.domain = 'positive'
+    domain = POSITIVE
    def f(self, x):
        return np.exp(x)
    def finv(self, x):
@ -82,8 +79,7 @@ class exponent(transformation):
        return '(+ve)'

 class negative_exponent(transformation):
-    def __init__(self):
-        self.domain = 'negative'
+    domain = NEGATIVE
    def f(self, x):
        return -np.exp(x)
    def finv(self, x):
@ -98,8 +94,7 @@ class negative_exponent(transformation):
        return '(-ve)'

 class square(transformation):
-    def __init__(self):
-        self.domain = 'positive'
+    domain = POSITIVE
    def f(self, x):
        return x ** 2
    def finv(self, x):
@ -112,8 +107,8 @@ class square(transformation):
        return '(+sq)'

 class logistic(transformation):
+    domain = BOUNDED
    def __init__(self, lower, upper):
-        self.domain = 'bounded'
        assert lower < upper
        self.lower, self.upper = float(lower), float(upper)
        self.difference = self.upper - self.lower
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -21,13 +21,15 @@ def crescent_data(seed=default_seed): # FIXME
    """

    data = GPy.util.datasets.crescent_data(seed=seed)
+    Y = data['Y']
+    Y[Y.flatten()==-1] = 0

    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1])

    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
-    likelihood = GPy.likelihoods.EP(data['Y'], distribution)
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
+    likelihood = GPy.likelihoods.EP(Y, distribution)


    m = GPy.models.GP(data['X'], likelihood, kernel)
@ -49,12 +51,15 @@ def oil():
    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
    """
    data = GPy.util.datasets.oil()
+    Y = data['Y'][:, 0:1]
+    Y[Y.flatten()==-1] = 0
+
    # Kernel object
    kernel = GPy.kern.rbf(12)

    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
-    likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1], distribution)
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
+    likelihood = GPy.likelihoods.EP(Y, distribution)

    # Create GP model
    m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
@ -79,12 +84,14 @@ def toy_linear_1d_classification(seed=default_seed):

    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
+    Y[Y.flatten() == -1] = 0

    # Kernel object
    kernel = GPy.kern.rbf(1)

    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    link = GPy.likelihoods.link_functions.probit
+    distribution = GPy.likelihoods.likelihood_functions.binomial(link)
    likelihood = GPy.likelihoods.EP(Y, distribution)

    # Model definition
@ -115,12 +122,13 @@ def sparse_toy_linear_1d_classification(seed=default_seed):

    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
+    Y[Y.flatten() == -1] = 0

    # Kernel object
    kernel = GPy.kern.rbf(1) + GPy.kern.white(1)

    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
    likelihood = GPy.likelihoods.EP(Y, distribution)

    Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
@ -156,13 +164,15 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
    """

    data = GPy.util.datasets.crescent_data(seed=seed)
+    Y = data['Y']
+    Y[Y.flatten()==-1]=0

    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])

    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
-    likelihood = GPy.likelihoods.EP(data['Y'], distribution)
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
+    likelihood = GPy.likelihoods.EP(Y, distribution)

    sample = np.random.randint(0, data['X'].shape[0], inducing)
    Z = data['X'][sample, :]
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@ -20,6 +20,7 @@ class EP(likelihood):
        self.N, self.D = self.data.shape
        self.is_heteroscedastic = True
        self.Nparams = 0
+        self._transf_data = self.likelihood_function._preprocess_values(data)

        #Initial values - Likelihood approximation parameters:
        #p(y|f) = t(f|tau_tilde,v_tilde)
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -8,19 +8,68 @@ import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
 from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
+import link_functions

-class likelihood_function:
+class likelihood_function(object):
    """
    Likelihood class for doing Expectation propagation

    :param Y: observed output (Nx1 numpy.darray)
    ..Note:: Y values allowed depend on the likelihood_function used
    """
-    def __init__(self,location=0,scale=1):
-        self.location = location
-        self.scale = scale
+    def __init__(self,link):
+        if link == self._analytical:
+            self.moments_match = self._moments_match_analytical
+        else:
+            assert isinstance(link,link_functions.link_function)
+            self.link = link
+            self.moments_match = self._moments_match_numerical

-class probit(likelihood_function):
+    def _preprocess_values(self,Y):
+        return Y
+
+    def _product(self,gp,obs,mu,sigma):
+        return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
+
+    def _nlog_product(self,gp,obs,mu,sigma):
+        return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
+
+    def _locate(self,obs,mu,sigma):
+        """
+        Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
+        """
+        golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
+        golden_B = np.array([np.log(obs),mu]).max() #Upper limit
+        return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
+
+    def _moments_match_numerical(self,obs,tau,v):
+        """
+        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
+        """
+        mu = v/tau
+        sigma = np.sqrt(1./tau)
+        opt = self._locate(obs,mu,sigma)
+        width = 3./np.log(max(obs,2))
+        A = opt - width #Grid's lower limit
+        B = opt + width #Grid's Upper limit
+        K =  10*int(np.log(max(obs,150))) #Number of points in the grid
+        h = (B-A)/K # length of the intervals
+        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
+        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
+        _aux1 = self._product(A,obs,mu,sigma)
+        _aux2 = self._product(B,obs,mu,sigma)
+        _aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
+        _aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
+        zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
+        first = zeroth*x
+        second = first*x
+        Z_hat = sum(zeroth)*h/3 # Zero-th moment
+        mu_hat = sum(first)*h/(3*Z_hat) # First moment
+        m2 = sum(second)*h/(3*Z_hat) # Second moment
+        sigma2_hat = m2 - mu_hat**2 # Second central moment
+        return float(Z_hat), float(mu_hat), float(sigma2_hat)
+
+class binomial(likelihood_function):
    """
    Probit likelihood
    Y is expected to take values in {-1,1}
@ -29,8 +78,33 @@ class probit(likelihood_function):
    L(x) = \\Phi (Y_i*f_i)
    $$
    """
+    def __init__(self,link=None):
+        self._analytical = link_functions.probit
+        if not link:
+            link = self._analytical
+        super(binomial, self).__init__(link)

-    def moments_match(self,data_i,tau_i,v_i):
+    def _distribution(self,gp,obs):
+        pass
+
+    def _log_distribution(self,gp,obs):
+        pass
+
+    def _preprocess_values(self,Y):
+        """
+        Check if the values of the observations correspond to the values
+        assumed by the likelihood function.
+
+        ..Note:: Binary classification algorithm works better with classes {-1,1}
+        """
+        Y_prep = Y.copy()
+        Y1 = Y[Y.flatten()==1].size
+        Y2 = Y[Y.flatten()==0].size
+        assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
+        Y_prep[Y.flatten() == 0] = -1
+        return Y_prep
+
+    def _moments_match_analytical(self,data_i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm

@ -38,8 +112,6 @@ class probit(likelihood_function):
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
-        #if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
-        # TODO: some version of assert
        z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
        Z_hat = std_norm_cdf(z)
        phi = std_norm_pdf(z)
@ -50,6 +122,8 @@ class probit(likelihood_function):
    def predictive_values(self,mu,var):
        """
        Compute  mean, variance and conficence interval (percentiles 5 and 95) of the  prediction
+        :param mu: mean of the latent variable
+        :param var: variance of the latent variable
        """
        mu = mu.flatten()
        var = var.flatten()
@ -69,68 +143,23 @@ class Poisson(likelihood_function):
    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
    $$
    """
-    def moments_match(self,data_i,tau_i,v_i):
-        """
-        Moments match of the marginal approximation in EP algorithm
+    def __init__(self,link=None):
+        self._analytical = None
+        if not link:
+            link = link_functions.log()
+        super(Poisson, self).__init__(link)

-        :param i: number of observation (int)
-        :param tau_i: precision of the cavity distribution (float)
-        :param v_i: mean/variance of the cavity distribution (float)
-        """
-        mu = v_i/tau_i
-        sigma = np.sqrt(1./tau_i)
-        def poisson_norm(f):
-            """
-            Product of the likelihood and the cavity distribution
-            """
-            pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
-            rate = np.exp( (f*self.scale)+self.location)
-            poisson = stats.poisson.pmf(float(data_i),rate)
-            return pdf_norm_f*poisson
+    def _distribution(self,gp,obs):
+        return stats.poisson.pmf(obs,self.link.inv_transf(gp))

-        def log_pnm(f):
-            """
-            Log of poisson_norm
-            """
-            return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
-
-        """
-        Golden Search and Simpson's Rule
-        --------------------------------
-        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
-        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
-        """
-        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
-
-        #Golden search
-        golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
-        golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
-        golden_A = (golden_A - self.location)/self.scale
-        golden_B = (golden_B - self.location)/self.scale
-        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
-
-        # Simpson's approximation
-        width = 3./np.log(max(data_i,2))
-        A = opt - width #Lower limit
-        B = opt + width #Upper limit
-        K =  10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
-        h = (B-A)/K # length of the intervals
-        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
-        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
-        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
-        first = zeroth*x
-        second = first*x
-        Z_hat = sum(zeroth)*h/3 # Zero-th moment
-        mu_hat = sum(first)*h/(3*Z_hat) # First moment
-        m2 = sum(second)*h/(3*Z_hat) # Second moment
-        sigma2_hat = m2 - mu_hat**2 # Second central moment
-        return float(Z_hat), float(mu_hat), float(sigma2_hat)
+    def _log_distribution(self,gp,obs):
+        return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)

    def predictive_values(self,mu,var):
        """
        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
        """
-        mean = np.exp(mu*self.scale + self.location)
+        mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
        tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
        p_025 = tmp[:,0]
        p_975 = tmp[:,1]
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@ -0,0 +1,58 @@
+# Copyright (c) 2012, 2013 Ricardo Andrade
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import numpy as np
+from scipy import stats
+import scipy as sp
+import pylab as pb
+from ..util.plot import gpplot
+from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
+
+class link_function(object):
+    """
+    Link function class for doing non-Gaussian likelihoods approximation
+
+    :param Y: observed output (Nx1 numpy.darray)
+    ..Note:: Y values allowed depend on the likelihood_function used
+    """
+    def __init__(self):
+        pass
+
+
+
+class identity(link_function):
+    def transf(self,mu):
+        return mu
+
+    def inv_transf(self,f):
+        return f
+
+    def log_inv_transf(self,f):
+        return np.log(f)
+
+class log(link_function):
+
+    def transf(self,mu):
+        return np.log(mu)
+
+    def inv_transf(self,f):
+        return np.exp(f)
+
+    def log_inv_transf(self,f):
+        return f
+
+class log_ex_1(link_function):
+    def transf(self,mu):
+        return np.log(np.exp(mu) - 1)
+
+    def inv_transf(self,f):
+        return np.log(np.exp(f)+1)
+
+    def log_inv_tranf(self,f):
+        return np.log(np.log(np.exp(f)+1))
+
+class probit(link_function):
+    pass
+
+