Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Max Zwiessele 2013-06-04 16:50:04 +01:00
commit dbc4bc3f3c
8 changed files with 178 additions and 76 deletions

View file

@ -2,16 +2,17 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import kern import core
import models import models
import inference import inference
import util import util
import examples import examples
from core import priors
import likelihoods import likelihoods
import testing import testing
from numpy.testing import Tester from numpy.testing import Tester
from nose.tools import nottest from nose.tools import nottest
import kern
from core import priors
@nottest @nottest
def tests(): def tests():

View file

@ -4,7 +4,7 @@ from .. import kern
from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
import pylab as pb import pylab as pb
class GPBase(model): class GPBase(model.model):
""" """
Gaussian Process model for holding shared behaviour between Gaussian Process model for holding shared behaviour between
sprase_GP and GP models sprase_GP and GP models
@ -24,6 +24,9 @@ class GPBase(model):
self._Xmean = X.mean(0)[None, :] self._Xmean = X.mean(0)[None, :]
self._Xstd = X.std(0)[None, :] self._Xstd = X.std(0)[None, :]
self.X = (X.copy() - self._Xmean) / self._Xstd self.X = (X.copy() - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.Q))
self._Xstd = np.ones((1,self.Q))
super(GPBase, self).__init__() super(GPBase, self).__init__()

View file

@ -21,13 +21,15 @@ def crescent_data(seed=default_seed): # FIXME
""" """
data = GPy.util.datasets.crescent_data(seed=seed) data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) kernel = GPy.kern.rbf(data['X'].shape[1])
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(data['Y'], distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.GP(data['X'], likelihood, kernel) m = GPy.models.GP(data['X'], likelihood, kernel)
@ -49,12 +51,15 @@ def oil():
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
""" """
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
Y = data['Y'][:, 0:1]
Y[Y.flatten()==-1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(12) kernel = GPy.kern.rbf(12)
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1], distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
# Create GP model # Create GP model
m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel) m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
@ -79,12 +84,14 @@ def toy_linear_1d_classification(seed=default_seed):
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() link = GPy.likelihoods.link_functions.probit
distribution = GPy.likelihoods.likelihood_functions.binomial(link)
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
# Model definition # Model definition
@ -115,12 +122,13 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(1) + GPy.kern.white(1) kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1)) Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
@ -156,13 +164,15 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
""" """
data = GPy.util.datasets.crescent_data(seed=seed) data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
# Kernel object # Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1]) kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
# Likelihood object # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(data['Y'], distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
sample = np.random.randint(0, data['X'].shape[0], inducing) sample = np.random.randint(0, data['X'].shape[0], inducing)
Z = data['X'][sample, :] Z = data['X'][sample, :]

View file

@ -20,6 +20,7 @@ class EP(likelihood):
self.N, self.D = self.data.shape self.N, self.D = self.data.shape
self.is_heteroscedastic = True self.is_heteroscedastic = True
self.Nparams = 0 self.Nparams = 0
self._transf_data = self.likelihood_function._preprocess_values(data)
#Initial values - Likelihood approximation parameters: #Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde) #p(y|f) = t(f|tau_tilde,v_tilde)

View file

@ -8,19 +8,68 @@ import scipy as sp
import pylab as pb import pylab as pb
from ..util.plot import gpplot from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions
class likelihood_function: class likelihood_function(object):
""" """
Likelihood class for doing Expectation propagation Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray) :param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used ..Note:: Y values allowed depend on the likelihood_function used
""" """
def __init__(self,location=0,scale=1): def __init__(self,link):
self.location = location if link == self._analytical:
self.scale = scale self.moments_match = self._moments_match_analytical
else:
assert isinstance(link,link_functions.link_function)
self.link = link
self.moments_match = self._moments_match_numerical
class probit(likelihood_function): def _preprocess_values(self,Y):
return Y
def _product(self,gp,obs,mu,sigma):
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
def _nlog_product(self,gp,obs,mu,sigma):
return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
def _locate(self,obs,mu,sigma):
"""
Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
"""
golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
golden_B = np.array([np.log(obs),mu]).max() #Upper limit
return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
def _moments_match_numerical(self,obs,tau,v):
"""
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
"""
mu = v/tau
sigma = np.sqrt(1./tau)
opt = self._locate(obs,mu,sigma)
width = 3./np.log(max(obs,2))
A = opt - width #Grid's lower limit
B = opt + width #Grid's Upper limit
K = 10*int(np.log(max(obs,150))) #Number of points in the grid
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
_aux1 = self._product(A,obs,mu,sigma)
_aux2 = self._product(B,obs,mu,sigma)
_aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
_aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
class binomial(likelihood_function):
""" """
Probit likelihood Probit likelihood
Y is expected to take values in {-1,1} Y is expected to take values in {-1,1}
@ -29,8 +78,33 @@ class probit(likelihood_function):
L(x) = \\Phi (Y_i*f_i) L(x) = \\Phi (Y_i*f_i)
$$ $$
""" """
def __init__(self,link=None):
self._analytical = link_functions.probit
if not link:
link = self._analytical
super(binomial, self).__init__(link)
def moments_match(self,data_i,tau_i,v_i): def _distribution(self,gp,obs):
pass
def _log_distribution(self,gp,obs):
pass
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
""" """
Moments match of the marginal approximation in EP algorithm Moments match of the marginal approximation in EP algorithm
@ -38,8 +112,6 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float) :param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float) :param v_i: mean/variance of the cavity distribution (float)
""" """
#if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
# TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z) Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z) phi = std_norm_pdf(z)
@ -50,6 +122,8 @@ class probit(likelihood_function):
def predictive_values(self,mu,var): def predictive_values(self,mu,var):
""" """
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
:param mu: mean of the latent variable
:param var: variance of the latent variable
""" """
mu = mu.flatten() mu = mu.flatten()
var = var.flatten() var = var.flatten()
@ -69,68 +143,23 @@ class Poisson(likelihood_function):
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$ $$
""" """
def moments_match(self,data_i,tau_i,v_i): def __init__(self,link=None):
""" self._analytical = None
Moments match of the marginal approximation in EP algorithm if not link:
link = link_functions.log()
super(Poisson, self).__init__(link)
:param i: number of observation (int) def _distribution(self,gp,obs):
:param tau_i: precision of the cavity distribution (float) return stats.poisson.pmf(obs,self.link.inv_transf(gp))
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
def poisson_norm(f):
"""
Product of the likelihood and the cavity distribution
"""
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
rate = np.exp( (f*self.scale)+self.location)
poisson = stats.poisson.pmf(float(data_i),rate)
return pdf_norm_f*poisson
def log_pnm(f): def _log_distribution(self,gp,obs):
""" return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
Log of poisson_norm
"""
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
"""
Golden Search and Simpson's Rule
--------------------------------
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
"""
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
#Golden search
golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
golden_A = (golden_A - self.location)/self.scale
golden_B = (golden_B - self.location)/self.scale
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
# Simpson's approximation
width = 3./np.log(max(data_i,2))
A = opt - width #Lower limit
B = opt + width #Upper limit
K = 10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
def predictive_values(self,mu,var): def predictive_values(self,mu,var):
""" """
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
""" """
mean = np.exp(mu*self.scale + self.location) mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
tmp = stats.poisson.ppf(np.array([.025,.975]),mean) tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
p_025 = tmp[:,0] p_025 = tmp[:,0]
p_975 = tmp[:,1] p_975 = tmp[:,1]

View file

@ -0,0 +1,58 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class link_function(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
class identity(link_function):
def transf(self,mu):
return mu
def inv_transf(self,f):
return f
def log_inv_transf(self,f):
return np.log(f)
class log(link_function):
def transf(self,mu):
return np.log(mu)
def inv_transf(self,f):
return np.exp(f)
def log_inv_transf(self,f):
return f
class log_ex_1(link_function):
def transf(self,mu):
return np.log(np.exp(mu) - 1)
def inv_transf(self,f):
return np.log(np.exp(f)+1)
def log_inv_tranf(self,f):
return np.log(np.log(np.exp(f)+1))
class probit(link_function):
pass

View file

@ -31,5 +31,5 @@ class GP_regression(GP):
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y) likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
super(GP_regression, self).__init__(self, X, likelihood, kernel, normalize_X=normalize_X) super(GP_regression, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params()) self._set_params(self._get_params())

View file

@ -8,7 +8,7 @@ import sys, pdb
# from .. import kern # from .. import kern
# from ..core import model # from ..core import model
# from ..util.linalg import pdinv, PCA # from ..util.linalg import pdinv, PCA
from ..core import GPLVM from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression from sparse_GP_regression import sparse_GP_regression
class sparse_GPLVM(sparse_GP_regression, GPLVM): class sparse_GPLVM(sparse_GP_regression, GPLVM):