Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Max Zwiessele 2013-06-04 16:50:04 +01:00
commit dbc4bc3f3c
8 changed files with 178 additions and 76 deletions

View file

@ -2,16 +2,17 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import kern
import core
import models
import inference
import util
import examples
from core import priors
import likelihoods
import testing
from numpy.testing import Tester
from nose.tools import nottest
import kern
from core import priors
@nottest
def tests():

View file

@ -4,7 +4,7 @@ from .. import kern
from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
import pylab as pb
class GPBase(model):
class GPBase(model.model):
"""
Gaussian Process model for holding shared behaviour between
sprase_GP and GP models
@ -24,6 +24,9 @@ class GPBase(model):
self._Xmean = X.mean(0)[None, :]
self._Xstd = X.std(0)[None, :]
self.X = (X.copy() - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.Q))
self._Xstd = np.ones((1,self.Q))
super(GPBase, self).__init__()

View file

@ -21,13 +21,15 @@ def crescent_data(seed=default_seed): # FIXME
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
# Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1])
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'], distribution)
distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.GP(data['X'], likelihood, kernel)
@ -49,12 +51,15 @@ def oil():
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
"""
data = GPy.util.datasets.oil()
Y = data['Y'][:, 0:1]
Y[Y.flatten()==-1] = 0
# Kernel object
kernel = GPy.kern.rbf(12)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1], distribution)
distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
# Create GP model
m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
@ -79,12 +84,14 @@ def toy_linear_1d_classification(seed=default_seed):
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
link = GPy.likelihoods.link_functions.probit
distribution = GPy.likelihoods.likelihood_functions.binomial(link)
likelihood = GPy.likelihoods.EP(Y, distribution)
# Model definition
@ -115,12 +122,13 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
@ -156,13 +164,15 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
# Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'], distribution)
distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
sample = np.random.randint(0, data['X'].shape[0], inducing)
Z = data['X'][sample, :]

View file

@ -20,6 +20,7 @@ class EP(likelihood):
self.N, self.D = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
self._transf_data = self.likelihood_function._preprocess_values(data)
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)

View file

@ -8,19 +8,68 @@ import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions
class likelihood_function:
class likelihood_function(object):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self,location=0,scale=1):
self.location = location
self.scale = scale
def __init__(self,link):
if link == self._analytical:
self.moments_match = self._moments_match_analytical
else:
assert isinstance(link,link_functions.link_function)
self.link = link
self.moments_match = self._moments_match_numerical
class probit(likelihood_function):
def _preprocess_values(self,Y):
return Y
def _product(self,gp,obs,mu,sigma):
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
def _nlog_product(self,gp,obs,mu,sigma):
return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
def _locate(self,obs,mu,sigma):
"""
Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
"""
golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
golden_B = np.array([np.log(obs),mu]).max() #Upper limit
return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
def _moments_match_numerical(self,obs,tau,v):
"""
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
"""
mu = v/tau
sigma = np.sqrt(1./tau)
opt = self._locate(obs,mu,sigma)
width = 3./np.log(max(obs,2))
A = opt - width #Grid's lower limit
B = opt + width #Grid's Upper limit
K = 10*int(np.log(max(obs,150))) #Number of points in the grid
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
_aux1 = self._product(A,obs,mu,sigma)
_aux2 = self._product(B,obs,mu,sigma)
_aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
_aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
class binomial(likelihood_function):
"""
Probit likelihood
Y is expected to take values in {-1,1}
@ -29,8 +78,33 @@ class probit(likelihood_function):
L(x) = \\Phi (Y_i*f_i)
$$
"""
def __init__(self,link=None):
self._analytical = link_functions.probit
if not link:
link = self._analytical
super(binomial, self).__init__(link)
def moments_match(self,data_i,tau_i,v_i):
def _distribution(self,gp,obs):
pass
def _log_distribution(self,gp,obs):
pass
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
@ -38,8 +112,6 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
#if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
# TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
@ -50,6 +122,8 @@ class probit(likelihood_function):
def predictive_values(self,mu,var):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
:param mu: mean of the latent variable
:param var: variance of the latent variable
"""
mu = mu.flatten()
var = var.flatten()
@ -69,68 +143,23 @@ class Poisson(likelihood_function):
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def moments_match(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
def __init__(self,link=None):
self._analytical = None
if not link:
link = link_functions.log()
super(Poisson, self).__init__(link)
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
def poisson_norm(f):
"""
Product of the likelihood and the cavity distribution
"""
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
rate = np.exp( (f*self.scale)+self.location)
poisson = stats.poisson.pmf(float(data_i),rate)
return pdf_norm_f*poisson
def _distribution(self,gp,obs):
return stats.poisson.pmf(obs,self.link.inv_transf(gp))
def log_pnm(f):
"""
Log of poisson_norm
"""
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
"""
Golden Search and Simpson's Rule
--------------------------------
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
"""
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
#Golden search
golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
golden_A = (golden_A - self.location)/self.scale
golden_B = (golden_B - self.location)/self.scale
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
# Simpson's approximation
width = 3./np.log(max(data_i,2))
A = opt - width #Lower limit
B = opt + width #Upper limit
K = 10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
def _log_distribution(self,gp,obs):
return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
def predictive_values(self,mu,var):
"""
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
"""
mean = np.exp(mu*self.scale + self.location)
mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
p_025 = tmp[:,0]
p_975 = tmp[:,1]

View file

@ -0,0 +1,58 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class link_function(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
class identity(link_function):
def transf(self,mu):
return mu
def inv_transf(self,f):
return f
def log_inv_transf(self,f):
return np.log(f)
class log(link_function):
def transf(self,mu):
return np.log(mu)
def inv_transf(self,f):
return np.exp(f)
def log_inv_transf(self,f):
return f
class log_ex_1(link_function):
def transf(self,mu):
return np.log(np.exp(mu) - 1)
def inv_transf(self,f):
return np.log(np.exp(f)+1)
def log_inv_tranf(self,f):
return np.log(np.log(np.exp(f)+1))
class probit(link_function):
pass

View file

@ -31,5 +31,5 @@ class GP_regression(GP):
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
super(GP_regression, self).__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
super(GP_regression, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())

View file

@ -8,7 +8,7 @@ import sys, pdb
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
from ..core import GPLVM
from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression
class sparse_GPLVM(sparse_GP_regression, GPLVM):