Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-06-02 14:45:15 +02:00 · 2013-06-04 17:19:44 +01:00 · 2013-06-04 17:19:44 +01:00 · e29e5624f5
commit e29e5624f5
parent edae1032c7 3546650d15
7 changed files with 263 additions and 163 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -14,6 +14,7 @@ import priors
 import re
 import sys
 import pdb
 from GPy.core.domains import POSITIVE, REAL
 # import numdifftools as ndt
 class model(parameterised):
@ -68,8 +69,9 @@ class model(parameterised):
        # check constraints are okay
-        if isinstance(what, (priors.gamma, priors.inverse_gamma, priors.log_Gaussian)):
+
-            constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == 'positive']
+        if what.domain is POSITIVE:
            constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == POSITIVE]
            if len(constrained_positive_indices):
                constrained_positive_indices = np.hstack(constrained_positive_indices)
            else:
@ -82,7 +84,7 @@ class model(parameterised):
                print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
                print '\n'
                self.constrain_positive(unconst)
-        elif isinstance(what, priors.Gaussian):
+        elif what.domain is REAL:
            assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
        else:
            raise ValueError, "prior not recognised"
--- a/GPy/core/priors.py
+++ b/GPy/core/priors.py
@ -6,8 +6,11 @@ import numpy as np
 import pylab as pb
 from scipy.special import gammaln, digamma
 from ..util.linalg import pdinv
 from GPy.core.domains import REAL, POSITIVE
 import warnings
 class prior:
    domain = None
    def pdf(self, x):
        return np.exp(self.lnpdf(x))
@ -29,7 +32,7 @@ class Gaussian(prior):
    .. Note:: Bishop 2006 notation is used throughout the code
    """
-
+    domain = REAL
    def __init__(self, mu, sigma):
        self.mu = float(mu)
        self.sigma = float(sigma)
@ -59,7 +62,7 @@ class log_Gaussian(prior):
    .. Note:: Bishop 2006 notation is used throughout the code
    """
-
+    domain = POSITIVE
    def __init__(self, mu, sigma):
        self.mu = float(mu)
        self.sigma = float(sigma)
@ -89,7 +92,7 @@ class multivariate_Gaussian:
    .. Note:: Bishop 2006 notation is used throughout the code
    """
-
+    domain = REAL
    def __init__(self, mu, var):
        self.mu = np.array(mu).flatten()
        self.var = np.array(var)
@ -138,7 +141,7 @@ def gamma_from_EV(E,V):
    :param V: variance
    """
-
+    warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
    a = np.square(E) / V
    b = E / V
    return gamma(a, b)
@ -153,6 +156,7 @@ class gamma(prior):
    .. Note:: Bishop 2006 notation is used throughout the code
    """
    domain = POSITIVE
    def __init__(self, a, b):
        self.a = float(a)
        self.b = float(b)
@ -191,6 +195,7 @@ class inverse_gamma(prior):
    .. Note:: Bishop 2006 notation is used throughout the code
    """
    domain = POSITIVE
    def __init__(self, a, b):
        self.a = float(a)
        self.b = float(b)
--- a/GPy/core/transformations.py
+++ b/GPy/core/transformations.py
@ -3,11 +3,10 @@
 import numpy as np
 from GPy.core.domains import POSITIVE, NEGATIVE, BOUNDED
 class transformation(object):
-    def __init__(self):
+    domain = None
        # set the domain. Suggest we use 'positive', 'bounded', etc
        self.domain = 'undefined'
    def f(self, x):
        raise NotImplementedError
@ -24,8 +23,7 @@ class transformation(object):
        raise NotImplementedError
 class logexp(transformation):
-    def __init__(self):
+    domain = POSITIVE
        self.domain = 'positive'
    def f(self, x):
        return np.log(1. + np.exp(x))
    def finv(self, f):
@ -43,8 +41,8 @@ class logexp_clipped(transformation):
    min_bound = 1e-10
    log_max_bound = np.log(max_bound)
    log_min_bound = np.log(min_bound)
    domain = POSITIVE
    def __init__(self, lower=1e-6):
        self.domain = 'positive'
        self.lower = lower
    def f(self, x):
        exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
@ -66,8 +64,7 @@ class logexp_clipped(transformation):
        return '(+ve_c)'
 class exponent(transformation):
-    def __init__(self):
+    domain = POSITIVE
        self.domain = 'positive'
    def f(self, x):
        return np.exp(x)
    def finv(self, x):
@ -82,8 +79,7 @@ class exponent(transformation):
        return '(+ve)'
 class negative_exponent(transformation):
-    def __init__(self):
+    domain = NEGATIVE
        self.domain = 'negative'
    def f(self, x):
        return -np.exp(x)
    def finv(self, x):
@ -98,8 +94,7 @@ class negative_exponent(transformation):
        return '(-ve)'
 class square(transformation):
-    def __init__(self):
+    domain = POSITIVE
        self.domain = 'positive'
    def f(self, x):
        return x ** 2
    def finv(self, x):
@ -112,8 +107,8 @@ class square(transformation):
        return '(+sq)'
 class logistic(transformation):
    domain = BOUNDED
    def __init__(self, lower, upper):
        self.domain = 'bounded'
        assert lower < upper
        self.lower, self.upper = float(lower), float(upper)
        self.difference = self.upper - self.lower
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -21,13 +21,15 @@ def crescent_data(seed=default_seed): # FIXME
    """
    data = GPy.util.datasets.crescent_data(seed=seed)
    Y = data['Y']
    Y[Y.flatten()==-1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1])
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
-    likelihood = GPy.likelihoods.EP(data['Y'], distribution)
+    likelihood = GPy.likelihoods.EP(Y, distribution)
    m = GPy.models.GP(data['X'], likelihood, kernel)
@ -49,12 +51,15 @@ def oil():
    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
    """
    data = GPy.util.datasets.oil()
    Y = data['Y'][:, 0:1]
    Y[Y.flatten()==-1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(12)
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
-    likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1], distribution)
+    likelihood = GPy.likelihoods.EP(Y, distribution)
    # Create GP model
    m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
@ -79,12 +84,14 @@ def toy_linear_1d_classification(seed=default_seed):
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(1)
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    link = GPy.likelihoods.link_functions.probit
    distribution = GPy.likelihoods.likelihood_functions.binomial(link)
    likelihood = GPy.likelihoods.EP(Y, distribution)
    # Model definition
@ -115,12 +122,13 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
    likelihood = GPy.likelihoods.EP(Y, distribution)
    Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
@ -156,13 +164,15 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
    """
    data = GPy.util.datasets.crescent_data(seed=seed)
    Y = data['Y']
    Y[Y.flatten()==-1]=0
    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
+    distribution = GPy.likelihoods.likelihood_functions.binomial()
-    likelihood = GPy.likelihoods.EP(data['Y'], distribution)
+    likelihood = GPy.likelihoods.EP(Y, distribution)
    sample = np.random.randint(0, data['X'].shape[0], inducing)
    Z = data['X'][sample, :]
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@ -20,6 +20,7 @@ class EP(likelihood):
        self.N, self.D = self.data.shape
        self.is_heteroscedastic = True
        self.Nparams = 0
        self._transf_data = self.likelihood_function._preprocess_values(data)
        #Initial values - Likelihood approximation parameters:
        #p(y|f) = t(f|tau_tilde,v_tilde)
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -8,19 +8,68 @@ import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
 from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
 import link_functions
-class likelihood_function:
+class likelihood_function(object):
    """
    Likelihood class for doing Expectation propagation
    :param Y: observed output (Nx1 numpy.darray)
    ..Note:: Y values allowed depend on the likelihood_function used
    """
-    def __init__(self,location=0,scale=1):
+    def __init__(self,link):
-        self.location = location
+        if link == self._analytical:
-        self.scale = scale
+            self.moments_match = self._moments_match_analytical
        else:
            assert isinstance(link,link_functions.link_function)
            self.link = link
            self.moments_match = self._moments_match_numerical
-class probit(likelihood_function):
+    def _preprocess_values(self,Y):
        return Y
    def _product(self,gp,obs,mu,sigma):
        return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
    def _nlog_product(self,gp,obs,mu,sigma):
        return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
    def _locate(self,obs,mu,sigma):
        """
        Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
        """
        golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
        golden_B = np.array([np.log(obs),mu]).max() #Upper limit
        return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
    def _moments_match_numerical(self,obs,tau,v):
        """
        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
        """
        mu = v/tau
        sigma = np.sqrt(1./tau)
        opt = self._locate(obs,mu,sigma)
        width = 3./np.log(max(obs,2))
        A = opt - width #Grid's lower limit
        B = opt + width #Grid's Upper limit
        K =  10*int(np.log(max(obs,150))) #Number of points in the grid
        h = (B-A)/K # length of the intervals
        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
        _aux1 = self._product(A,obs,mu,sigma)
        _aux2 = self._product(B,obs,mu,sigma)
        _aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
        _aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
        zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
        first = zeroth*x
        second = first*x
        Z_hat = sum(zeroth)*h/3 # Zero-th moment
        mu_hat = sum(first)*h/(3*Z_hat) # First moment
        m2 = sum(second)*h/(3*Z_hat) # Second moment
        sigma2_hat = m2 - mu_hat**2 # Second central moment
        return float(Z_hat), float(mu_hat), float(sigma2_hat)
 class binomial(likelihood_function):
    """
    Probit likelihood
    Y is expected to take values in {-1,1}
@ -29,8 +78,33 @@ class probit(likelihood_function):
    L(x) = \\Phi (Y_i*f_i)
    $$
    """
    def __init__(self,link=None):
        self._analytical = link_functions.probit
        if not link:
            link = self._analytical
        super(binomial, self).__init__(link)
-    def moments_match(self,data_i,tau_i,v_i):
+    def _distribution(self,gp,obs):
        pass
    def _log_distribution(self,gp,obs):
        pass
    def _preprocess_values(self,Y):
        """
        Check if the values of the observations correspond to the values
        assumed by the likelihood function.
        ..Note:: Binary classification algorithm works better with classes {-1,1}
        """
        Y_prep = Y.copy()
        Y1 = Y[Y.flatten()==1].size
        Y2 = Y[Y.flatten()==0].size
        assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
        Y_prep[Y.flatten() == 0] = -1
        return Y_prep
    def _moments_match_analytical(self,data_i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm
@ -38,8 +112,6 @@ class probit(likelihood_function):
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
        #if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
        # TODO: some version of assert
        z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
        Z_hat = std_norm_cdf(z)
        phi = std_norm_pdf(z)
@ -50,6 +122,8 @@ class probit(likelihood_function):
    def predictive_values(self,mu,var):
        """
        Compute  mean, variance and conficence interval (percentiles 5 and 95) of the  prediction
        :param mu: mean of the latent variable
        :param var: variance of the latent variable
        """
        mu = mu.flatten()
        var = var.flatten()
@ -69,68 +143,23 @@ class Poisson(likelihood_function):
    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
    $$
    """
-    def moments_match(self,data_i,tau_i,v_i):
+    def __init__(self,link=None):
-        """
+        self._analytical = None
-        Moments match of the marginal approximation in EP algorithm
+        if not link:
            link = link_functions.log()
        super(Poisson, self).__init__(link)
-        :param i: number of observation (int)
+    def _distribution(self,gp,obs):
-        :param tau_i: precision of the cavity distribution (float)
+        return stats.poisson.pmf(obs,self.link.inv_transf(gp))
        :param v_i: mean/variance of the cavity distribution (float)
        """
        mu = v_i/tau_i
        sigma = np.sqrt(1./tau_i)
        def poisson_norm(f):
            """
            Product of the likelihood and the cavity distribution
            """
            pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
            rate = np.exp( (f*self.scale)+self.location)
            poisson = stats.poisson.pmf(float(data_i),rate)
            return pdf_norm_f*poisson
-        def log_pnm(f):
+    def _log_distribution(self,gp,obs):
-            """
+        return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
            Log of poisson_norm
            """
            return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
        """
        Golden Search and Simpson's Rule
        --------------------------------
        Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
        Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
        """
        #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
        #Golden search
        golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
        golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
        golden_A = (golden_A - self.location)/self.scale
        golden_B = (golden_B - self.location)/self.scale
        opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
        # Simpson's approximation
        width = 3./np.log(max(data_i,2))
        A = opt - width #Lower limit
        B = opt + width #Upper limit
        K =  10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
        h = (B-A)/K # length of the intervals
        grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
        x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
        zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
        first = zeroth*x
        second = first*x
        Z_hat = sum(zeroth)*h/3 # Zero-th moment
        mu_hat = sum(first)*h/(3*Z_hat) # First moment
        m2 = sum(second)*h/(3*Z_hat) # Second moment
        sigma2_hat = m2 - mu_hat**2 # Second central moment
        return float(Z_hat), float(mu_hat), float(sigma2_hat)
    def predictive_values(self,mu,var):
        """
        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
        """
-        mean = np.exp(mu*self.scale + self.location)
+        mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
        tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
        p_025 = tmp[:,0]
        p_975 = tmp[:,1]
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@ -0,0 +1,58 @@
 # Copyright (c) 2012, 2013 Ricardo Andrade
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 from scipy import stats
 import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
 from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
 class link_function(object):
    """
    Link function class for doing non-Gaussian likelihoods approximation
    :param Y: observed output (Nx1 numpy.darray)
    ..Note:: Y values allowed depend on the likelihood_function used
    """
    def __init__(self):
        pass
 class identity(link_function):
    def transf(self,mu):
        return mu
    def inv_transf(self,f):
        return f
    def log_inv_transf(self,f):
        return np.log(f)
 class log(link_function):
    def transf(self,mu):
        return np.log(mu)
    def inv_transf(self,f):
        return np.exp(f)
    def log_inv_transf(self,f):
        return f
 class log_ex_1(link_function):
    def transf(self,mu):
        return np.log(np.exp(mu) - 1)
    def inv_transf(self,f):
        return np.log(np.exp(f)+1)
    def log_inv_tranf(self,f):
        return np.log(np.log(np.exp(f)+1))
 class probit(link_function):
    pass