Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-18 13:55:14 +02:00 · 2013-06-04 16:50:04 +01:00 · 2013-06-04 16:50:04 +01:00 · dbc4bc3f3c
commit dbc4bc3f3c
parent 3fceef9d67 d17aa5276c
8 changed files with 178 additions and 76 deletions
--- a/GPy/init.py
+++ b/GPy/init.py
@ -2,16 +2,17 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-import kern
+import core
 import models
 import inference
 import util
 import examples
 from core import priors
 import likelihoods
 import testing
 from numpy.testing import Tester
 from nose.tools import nottest
 import kern
 from core import priors
@nottest
 def tests():
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@ -4,7 +4,7 @@ from .. import kern
 from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
 import pylab as pb
-class GPBase(model):
+class GPBase(model.model):
    """
    Gaussian Process model for holding shared behaviour between
    sprase_GP and GP models
@ -24,6 +24,9 @@ class GPBase(model):
            self._Xmean = X.mean(0)[None, :]
            self._Xstd = X.std(0)[None, :]
            self.X = (X.copy() - self._Xmean) / self._Xstd
        else:
            self._Xmean = np.zeros((1,self.Q))
            self._Xstd = np.ones((1,self.Q))
        super(GPBase, self).__init__()
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -21,13 +21,15 @@ def crescent_data(seed=default_seed): # FIXME
    """
    data = GPy.util.datasets.crescent_data(seed=seed)
    Y = data['Y']
    Y[Y.flatten()==-1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1])
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
-    likelihood = GPy.likelihoods.EP(data['Y'], distribution)
+    likelihood = GPy.likelihoods.EP(Y, distribution)
    m = GPy.models.GP(data['X'], likelihood, kernel)
@ -49,12 +51,15 @@ def oil():
    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
    """
    data = GPy.util.datasets.oil()
    Y = data['Y'][:, 0:1]
    Y[Y.flatten()==-1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(12)
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
-    likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1], distribution)
+    likelihood = GPy.likelihoods.EP(Y, distribution)
    # Create GP model
    m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
@ -79,12 +84,14 @@ def toy_linear_1d_classification(seed=default_seed):
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(1)
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    link = GPy.likelihoods.link_functions.probit
    distribution = GPy.likelihoods.likelihood_functions.binomial(link)
    likelihood = GPy.likelihoods.EP(Y, distribution)
    # Model definition
@ -115,12 +122,13 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
    likelihood = GPy.likelihoods.EP(Y, distribution)
    Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
@ -156,13 +164,15 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
    """
    data = GPy.util.datasets.crescent_data(seed=seed)
    Y = data['Y']
    Y[Y.flatten()==-1]=0
    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
-    likelihood = GPy.likelihoods.EP(data['Y'], distribution)
+    likelihood = GPy.likelihoods.EP(Y, distribution)
    sample = np.random.randint(0, data['X'].shape[0], inducing)
    Z = data['X'][sample, :]
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@ -20,6 +20,7 @@ class EP(likelihood):
        self.N, self.D = self.data.shape
        self.is_heteroscedastic = True
        self.Nparams = 0
        self._transf_data = self.likelihood_function._preprocess_values(data)
        #Initial values - Likelihood approximation parameters:
        #p(y|f) = t(f|tau_tilde,v_tilde)
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -8,19 +8,68 @@ import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
 from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
 import link_functions
-class likelihood_function:
+class likelihood_function(object):
    """
    Likelihood class for doing Expectation propagation
    :param Y: observed output (Nx1 numpy.darray)
    ..Note:: Y values allowed depend on the likelihood_function used
    """
-    def __init__(self,location=0,scale=1):
+    def __init__(self,link):
-        self.location = location
+        if link == self._analytical:
-        self.scale = scale
+            self.moments_match = self._moments_match_analytical
        else:
            assert isinstance(link,link_functions.link_function)
            self.link = link
            self.moments_match = self._moments_match_numerical
-class probit(likelihood_function):
+    def _preprocess_values(self,Y):
        return Y
    def _product(self,gp,obs,mu,sigma):
        return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
    def _nlog_product(self,gp,obs,mu,sigma):
        return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
    def _locate(self,obs,mu,sigma):
        """
        Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
        """
        golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
        golden_B = np.array([np.log(obs),mu]).max() #Upper limit
        return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
    def _moments_match_numerical(self,obs,tau,v):
        """
        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
        """
        mu = v/tau
        sigma = np.sqrt(1./tau)
        opt = self._locate(obs,mu,sigma)
        width = 3./np.log(max(obs,2))
        A = opt - width #Grid's lower limit
        B = opt + width #Grid's Upper limit
        K =  10*int(np.log(max(obs,150))) #Number of points in the grid
        h = (B-A)/K # length of the intervals
        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
        _aux1 = self._product(A,obs,mu,sigma)
        _aux2 = self._product(B,obs,mu,sigma)
        _aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
        _aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
        zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
        first = zeroth*x
        second = first*x
        Z_hat = sum(zeroth)*h/3 # Zero-th moment
        mu_hat = sum(first)*h/(3*Z_hat) # First moment
        m2 = sum(second)*h/(3*Z_hat) # Second moment
        sigma2_hat = m2 - mu_hat**2 # Second central moment
        return float(Z_hat), float(mu_hat), float(sigma2_hat)
 class binomial(likelihood_function):
    """
    Probit likelihood
    Y is expected to take values in {-1,1}
@ -29,8 +78,33 @@ class probit(likelihood_function):
    L(x) = \\Phi (Y_i*f_i)
    $$
    """
    def __init__(self,link=None):
        self._analytical = link_functions.probit
        if not link:
            link = self._analytical
        super(binomial, self).__init__(link)
-    def moments_match(self,data_i,tau_i,v_i):
+    def _distribution(self,gp,obs):
        pass
    def _log_distribution(self,gp,obs):
        pass
    def _preprocess_values(self,Y):
        """
        Check if the values of the observations correspond to the values
        assumed by the likelihood function.
        ..Note:: Binary classification algorithm works better with classes {-1,1}
        """
        Y_prep = Y.copy()
        Y1 = Y[Y.flatten()==1].size
        Y2 = Y[Y.flatten()==0].size
        assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
        Y_prep[Y.flatten() == 0] = -1
        return Y_prep
    def _moments_match_analytical(self,data_i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm
@ -38,8 +112,6 @@ class probit(likelihood_function):
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
        #if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
        # TODO: some version of assert
        z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
        Z_hat = std_norm_cdf(z)
        phi = std_norm_pdf(z)
@ -50,6 +122,8 @@ class probit(likelihood_function):
    def predictive_values(self,mu,var):
        """
        Compute  mean, variance and conficence interval (percentiles 5 and 95) of the  prediction
        :param mu: mean of the latent variable
        :param var: variance of the latent variable
        """
        mu = mu.flatten()
        var = var.flatten()
@ -69,68 +143,23 @@ class Poisson(likelihood_function):
    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
    $$
    """
-    def moments_match(self,data_i,tau_i,v_i):
+    def __init__(self,link=None):
-        """
+        self._analytical = None
-        Moments match of the marginal approximation in EP algorithm
+        if not link:
            link = link_functions.log()
        super(Poisson, self).__init__(link)
-        :param i: number of observation (int)
+    def _distribution(self,gp,obs):
-        :param tau_i: precision of the cavity distribution (float)
+        return stats.poisson.pmf(obs,self.link.inv_transf(gp))
        :param v_i: mean/variance of the cavity distribution (float)
        """
        mu = v_i/tau_i
        sigma = np.sqrt(1./tau_i)
        def poisson_norm(f):
            """
            Product of the likelihood and the cavity distribution
            """
            pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
            rate = np.exp( (f*self.scale)+self.location)
            poisson = stats.poisson.pmf(float(data_i),rate)
            return pdf_norm_f*poisson
-        def log_pnm(f):
+    def _log_distribution(self,gp,obs):
-            """
+        return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
            Log of poisson_norm
            """
            return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
        """
        Golden Search and Simpson's Rule
        --------------------------------
        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
        """
        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
        #Golden search
        golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
        golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
        golden_A = (golden_A - self.location)/self.scale
        golden_B = (golden_B - self.location)/self.scale
        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
        # Simpson's approximation
        width = 3./np.log(max(data_i,2))
        A = opt - width #Lower limit
        B = opt + width #Upper limit
        K =  10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
        h = (B-A)/K # length of the intervals
        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
        first = zeroth*x
        second = first*x
        Z_hat = sum(zeroth)*h/3 # Zero-th moment
        mu_hat = sum(first)*h/(3*Z_hat) # First moment
        m2 = sum(second)*h/(3*Z_hat) # Second moment
        sigma2_hat = m2 - mu_hat**2 # Second central moment
        return float(Z_hat), float(mu_hat), float(sigma2_hat)
    def predictive_values(self,mu,var):
        """
        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
        """
-        mean = np.exp(mu*self.scale + self.location)
+        mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
        tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
        p_025 = tmp[:,0]
        p_975 = tmp[:,1]
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@ -0,0 +1,58 @@
 # Copyright (c) 2012, 2013 Ricardo Andrade
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 from scipy import stats
 import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
 from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
 class link_function(object):
    """
    Link function class for doing non-Gaussian likelihoods approximation
    :param Y: observed output (Nx1 numpy.darray)
    ..Note:: Y values allowed depend on the likelihood_function used
    """
    def __init__(self):
        pass
 class identity(link_function):
    def transf(self,mu):
        return mu
    def inv_transf(self,f):
        return f
    def log_inv_transf(self,f):
        return np.log(f)
 class log(link_function):
    def transf(self,mu):
        return np.log(mu)
    def inv_transf(self,f):
        return np.exp(f)
    def log_inv_transf(self,f):
        return f
 class log_ex_1(link_function):
    def transf(self,mu):
        return np.log(np.exp(mu) - 1)
    def inv_transf(self,f):
        return np.log(np.exp(f)+1)
    def log_inv_tranf(self,f):
        return np.log(np.log(np.exp(f)+1))
 class probit(link_function):
    pass
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@ -31,5 +31,5 @@ class GP_regression(GP):
        likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
-        super(GP_regression, self).__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
+        super(GP_regression, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
        self._set_params(self._get_params())
--- a/GPy/models/sparse_GPLVM.py
+++ b/GPy/models/sparse_GPLVM.py
@ -8,7 +8,7 @@ import sys, pdb
 # from .. import kern
 # from ..core import model
 # from ..util.linalg import pdinv, PCA
-from ..core import GPLVM
+from GPLVM import GPLVM
 from sparse_GP_regression import sparse_GP_regression
 class sparse_GPLVM(sparse_GP_regression, GPLVM):