Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
James Hensman 2013-06-04 17:19:44 +01:00
commit e29e5624f5
7 changed files with 263 additions and 163 deletions

View file

@ -14,6 +14,7 @@ import priors
import re import re
import sys import sys
import pdb import pdb
from GPy.core.domains import POSITIVE, REAL
# import numdifftools as ndt # import numdifftools as ndt
class model(parameterised): class model(parameterised):
@ -68,8 +69,9 @@ class model(parameterised):
# check constraints are okay # check constraints are okay
if isinstance(what, (priors.gamma, priors.inverse_gamma, priors.log_Gaussian)):
constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == 'positive'] if what.domain is POSITIVE:
constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == POSITIVE]
if len(constrained_positive_indices): if len(constrained_positive_indices):
constrained_positive_indices = np.hstack(constrained_positive_indices) constrained_positive_indices = np.hstack(constrained_positive_indices)
else: else:
@ -82,7 +84,7 @@ class model(parameterised):
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst]) print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
print '\n' print '\n'
self.constrain_positive(unconst) self.constrain_positive(unconst)
elif isinstance(what, priors.Gaussian): elif what.domain is REAL:
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible" assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
else: else:
raise ValueError, "prior not recognised" raise ValueError, "prior not recognised"

View file

@ -6,8 +6,11 @@ import numpy as np
import pylab as pb import pylab as pb
from scipy.special import gammaln, digamma from scipy.special import gammaln, digamma
from ..util.linalg import pdinv from ..util.linalg import pdinv
from GPy.core.domains import REAL, POSITIVE
import warnings
class prior: class prior:
domain = None
def pdf(self, x): def pdf(self, x):
return np.exp(self.lnpdf(x)) return np.exp(self.lnpdf(x))
@ -29,7 +32,7 @@ class Gaussian(prior):
.. Note:: Bishop 2006 notation is used throughout the code .. Note:: Bishop 2006 notation is used throughout the code
""" """
domain = REAL
def __init__(self, mu, sigma): def __init__(self, mu, sigma):
self.mu = float(mu) self.mu = float(mu)
self.sigma = float(sigma) self.sigma = float(sigma)
@ -59,7 +62,7 @@ class log_Gaussian(prior):
.. Note:: Bishop 2006 notation is used throughout the code .. Note:: Bishop 2006 notation is used throughout the code
""" """
domain = POSITIVE
def __init__(self, mu, sigma): def __init__(self, mu, sigma):
self.mu = float(mu) self.mu = float(mu)
self.sigma = float(sigma) self.sigma = float(sigma)
@ -89,7 +92,7 @@ class multivariate_Gaussian:
.. Note:: Bishop 2006 notation is used throughout the code .. Note:: Bishop 2006 notation is used throughout the code
""" """
domain = REAL
def __init__(self, mu, var): def __init__(self, mu, var):
self.mu = np.array(mu).flatten() self.mu = np.array(mu).flatten()
self.var = np.array(var) self.var = np.array(var)
@ -138,7 +141,7 @@ def gamma_from_EV(E,V):
:param V: variance :param V: variance
""" """
warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
a = np.square(E) / V a = np.square(E) / V
b = E / V b = E / V
return gamma(a, b) return gamma(a, b)
@ -153,6 +156,7 @@ class gamma(prior):
.. Note:: Bishop 2006 notation is used throughout the code .. Note:: Bishop 2006 notation is used throughout the code
""" """
domain = POSITIVE
def __init__(self, a, b): def __init__(self, a, b):
self.a = float(a) self.a = float(a)
self.b = float(b) self.b = float(b)
@ -191,6 +195,7 @@ class inverse_gamma(prior):
.. Note:: Bishop 2006 notation is used throughout the code .. Note:: Bishop 2006 notation is used throughout the code
""" """
domain = POSITIVE
def __init__(self, a, b): def __init__(self, a, b):
self.a = float(a) self.a = float(a)
self.b = float(b) self.b = float(b)

View file

@ -3,11 +3,10 @@
import numpy as np import numpy as np
from GPy.core.domains import POSITIVE, NEGATIVE, BOUNDED
class transformation(object): class transformation(object):
def __init__(self): domain = None
# set the domain. Suggest we use 'positive', 'bounded', etc
self.domain = 'undefined'
def f(self, x): def f(self, x):
raise NotImplementedError raise NotImplementedError
@ -24,8 +23,7 @@ class transformation(object):
raise NotImplementedError raise NotImplementedError
class logexp(transformation): class logexp(transformation):
def __init__(self): domain = POSITIVE
self.domain = 'positive'
def f(self, x): def f(self, x):
return np.log(1. + np.exp(x)) return np.log(1. + np.exp(x))
def finv(self, f): def finv(self, f):
@ -43,8 +41,8 @@ class logexp_clipped(transformation):
min_bound = 1e-10 min_bound = 1e-10
log_max_bound = np.log(max_bound) log_max_bound = np.log(max_bound)
log_min_bound = np.log(min_bound) log_min_bound = np.log(min_bound)
domain = POSITIVE
def __init__(self, lower=1e-6): def __init__(self, lower=1e-6):
self.domain = 'positive'
self.lower = lower self.lower = lower
def f(self, x): def f(self, x):
exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound)) exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
@ -66,8 +64,7 @@ class logexp_clipped(transformation):
return '(+ve_c)' return '(+ve_c)'
class exponent(transformation): class exponent(transformation):
def __init__(self): domain = POSITIVE
self.domain = 'positive'
def f(self, x): def f(self, x):
return np.exp(x) return np.exp(x)
def finv(self, x): def finv(self, x):
@ -82,8 +79,7 @@ class exponent(transformation):
return '(+ve)' return '(+ve)'
class negative_exponent(transformation): class negative_exponent(transformation):
def __init__(self): domain = NEGATIVE
self.domain = 'negative'
def f(self, x): def f(self, x):
return -np.exp(x) return -np.exp(x)
def finv(self, x): def finv(self, x):
@ -98,8 +94,7 @@ class negative_exponent(transformation):
return '(-ve)' return '(-ve)'
class square(transformation): class square(transformation):
def __init__(self): domain = POSITIVE
self.domain = 'positive'
def f(self, x): def f(self, x):
return x ** 2 return x ** 2
def finv(self, x): def finv(self, x):
@ -112,8 +107,8 @@ class square(transformation):
return '(+sq)' return '(+sq)'
class logistic(transformation): class logistic(transformation):
domain = BOUNDED
def __init__(self, lower, upper): def __init__(self, lower, upper):
self.domain = 'bounded'
assert lower < upper assert lower < upper
self.lower, self.upper = float(lower), float(upper) self.lower, self.upper = float(lower), float(upper)
self.difference = self.upper - self.lower self.difference = self.upper - self.lower

View file

@ -21,13 +21,15 @@ def crescent_data(seed=default_seed): # FIXME
""" """
data = GPy.util.datasets.crescent_data(seed=seed) data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) kernel = GPy.kern.rbf(data['X'].shape[1])
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(data['Y'], distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.GP(data['X'], likelihood, kernel) m = GPy.models.GP(data['X'], likelihood, kernel)
@ -49,12 +51,15 @@ def oil():
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
""" """
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
Y = data['Y'][:, 0:1]
Y[Y.flatten()==-1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(12) kernel = GPy.kern.rbf(12)
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1], distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
# Create GP model # Create GP model
m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel) m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
@ -79,12 +84,14 @@ def toy_linear_1d_classification(seed=default_seed):
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() link = GPy.likelihoods.link_functions.probit
distribution = GPy.likelihoods.likelihood_functions.binomial(link)
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
# Model definition # Model definition
@ -115,12 +122,13 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(1) + GPy.kern.white(1) kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1)) Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
@ -156,13 +164,15 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
""" """
data = GPy.util.datasets.crescent_data(seed=seed) data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1]) kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(data['Y'], distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
sample = np.random.randint(0, data['X'].shape[0], inducing) sample = np.random.randint(0, data['X'].shape[0], inducing)
Z = data['X'][sample, :] Z = data['X'][sample, :]

View file

@ -20,6 +20,7 @@ class EP(likelihood):
self.N, self.D = self.data.shape self.N, self.D = self.data.shape
self.is_heteroscedastic = True self.is_heteroscedastic = True
self.Nparams = 0 self.Nparams = 0
self._transf_data = self.likelihood_function._preprocess_values(data)
#Initial values - Likelihood approximation parameters: #Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde) #p(y|f) = t(f|tau_tilde,v_tilde)

View file

@ -8,19 +8,68 @@ import scipy as sp
import pylab as pb import pylab as pb
from ..util.plot import gpplot from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions
class likelihood_function: class likelihood_function(object):
""" """
Likelihood class for doing Expectation propagation Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray) :param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used ..Note:: Y values allowed depend on the likelihood_function used
""" """
def __init__(self,location=0,scale=1): def __init__(self,link):
self.location = location if link == self._analytical:
self.scale = scale self.moments_match = self._moments_match_analytical
else:
assert isinstance(link,link_functions.link_function)
self.link = link
self.moments_match = self._moments_match_numerical
class probit(likelihood_function): def _preprocess_values(self,Y):
return Y
def _product(self,gp,obs,mu,sigma):
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
def _nlog_product(self,gp,obs,mu,sigma):
return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
def _locate(self,obs,mu,sigma):
"""
Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
"""
golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
golden_B = np.array([np.log(obs),mu]).max() #Upper limit
return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
def _moments_match_numerical(self,obs,tau,v):
"""
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
"""
mu = v/tau
sigma = np.sqrt(1./tau)
opt = self._locate(obs,mu,sigma)
width = 3./np.log(max(obs,2))
A = opt - width #Grid's lower limit
B = opt + width #Grid's Upper limit
K = 10*int(np.log(max(obs,150))) #Number of points in the grid
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
_aux1 = self._product(A,obs,mu,sigma)
_aux2 = self._product(B,obs,mu,sigma)
_aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
_aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
class binomial(likelihood_function):
""" """
Probit likelihood Probit likelihood
Y is expected to take values in {-1,1} Y is expected to take values in {-1,1}
@ -29,8 +78,33 @@ class probit(likelihood_function):
L(x) = \\Phi (Y_i*f_i) L(x) = \\Phi (Y_i*f_i)
$$ $$
""" """
def __init__(self,link=None):
self._analytical = link_functions.probit
if not link:
link = self._analytical
super(binomial, self).__init__(link)
def moments_match(self,data_i,tau_i,v_i): def _distribution(self,gp,obs):
pass
def _log_distribution(self,gp,obs):
pass
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
""" """
Moments match of the marginal approximation in EP algorithm Moments match of the marginal approximation in EP algorithm
@ -38,8 +112,6 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float) :param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float) :param v_i: mean/variance of the cavity distribution (float)
""" """
#if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
# TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z) Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z) phi = std_norm_pdf(z)
@ -50,6 +122,8 @@ class probit(likelihood_function):
def predictive_values(self,mu,var): def predictive_values(self,mu,var):
""" """
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
:param mu: mean of the latent variable
:param var: variance of the latent variable
""" """
mu = mu.flatten() mu = mu.flatten()
var = var.flatten() var = var.flatten()
@ -69,68 +143,23 @@ class Poisson(likelihood_function):
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$ $$
""" """
def moments_match(self,data_i,tau_i,v_i): def __init__(self,link=None):
""" self._analytical = None
Moments match of the marginal approximation in EP algorithm if not link:
link = link_functions.log()
super(Poisson, self).__init__(link)
:param i: number of observation (int) def _distribution(self,gp,obs):
:param tau_i: precision of the cavity distribution (float) return stats.poisson.pmf(obs,self.link.inv_transf(gp))
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
def poisson_norm(f):
"""
Product of the likelihood and the cavity distribution
"""
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
rate = np.exp( (f*self.scale)+self.location)
poisson = stats.poisson.pmf(float(data_i),rate)
return pdf_norm_f*poisson
def log_pnm(f): def _log_distribution(self,gp,obs):
""" return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
Log of poisson_norm
"""
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
"""
Golden Search and Simpson's Rule
--------------------------------
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
"""
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
#Golden search
golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
golden_A = (golden_A - self.location)/self.scale
golden_B = (golden_B - self.location)/self.scale
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
# Simpson's approximation
width = 3./np.log(max(data_i,2))
A = opt - width #Lower limit
B = opt + width #Upper limit
K = 10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
def predictive_values(self,mu,var): def predictive_values(self,mu,var):
""" """
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
""" """
mean = np.exp(mu*self.scale + self.location) mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
tmp = stats.poisson.ppf(np.array([.025,.975]),mean) tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
p_025 = tmp[:,0] p_025 = tmp[:,0]
p_975 = tmp[:,1] p_975 = tmp[:,1]

View file

@ -0,0 +1,58 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class link_function(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
class identity(link_function):
def transf(self,mu):
return mu
def inv_transf(self,f):
return f
def log_inv_transf(self,f):
return np.log(f)
class log(link_function):
def transf(self,mu):
return np.log(mu)
def inv_transf(self,f):
return np.exp(f)
def log_inv_transf(self,f):
return f
class log_ex_1(link_function):
def transf(self,mu):
return np.log(np.exp(mu) - 1)
def inv_transf(self,f):
return np.log(np.exp(f)+1)
def log_inv_tranf(self,f):
return np.log(np.log(np.exp(f)+1))
class probit(link_function):
pass