Merge branch 'newGP' of github.com:SheffieldML/GPy into newGP

This commit is contained in:
James Hensman 2013-02-01 17:13:10 +00:00
commit f7d2fc6ca4
7 changed files with 147 additions and 184 deletions

View file

@ -3,16 +3,15 @@
"""
Simple Gaussian Processes classification
Gaussian Processes classification
"""
import pylab as pb
import numpy as np
import GPy
default_seed=10000
######################################
## 2 dimensional example
def crescent_data(model_type='Full', inducing=10, seed=default_seed):
def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@ -30,7 +29,7 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
# create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
m.approximate_likelihood()
m.update_likelihood_approximation()
print(m)
# optimize
@ -42,53 +41,66 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
return m
def oil():
"""Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood."""
"""
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
"""
data = GPy.util.datasets.oil()
likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
# Kernel object
kernel = GPy.kern.rbf(12)
# create simple GP model
m = GPy.models.GP_EP(data['X'],likelihood)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)
# contrain all parameters to be positive
# Create GP model
m = GPy.models.GP(data['X'],kernel,likelihood=likelihood)
# Contrain all parameters to be positive
m.constrain_positive('')
m.tie_param('lengthscale')
m.approximate_likelihood()
m.update_likelihood_approximation()
# optimize
# Optimize
m.optimize()
# plot
#m.plot()
print(m)
return m
def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_seed):
"""Simple 1D classification example.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
def toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
assert model_type in ('Full','DTC','FITC')
# create simple GP model
if model_type=='Full':
m = GPy.models.GP_EP(data['X'],likelihood)
else:
# create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
# Kernel object
kernel = GPy.kern.rbf(1)
m.constrain_positive('var')
m.constrain_positive('len')
m.tie_param('lengthscale')
m.approximate_likelihood()
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)
# Optimize and plot
m.em(plot_all=False) # EM algorithm
m.plot()
# Model definition
m = GPy.models.GP(data['X'],kernel,likelihood=likelihood)
# Optimize
"""
EPEM runs a loop that consists of two steps:
1) EP likelihood approximation:
m.update_likelihood_approximation()
2) Parameters optimization:
m.optimize()
"""
m.EPEM()
# Plot
pb.subplot(211)
m.plot_GP()
pb.subplot(212)
m.plot_output()
print(m)
return m

View file

@ -1,43 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Simple Gaussian Processes classification 1D
probit likelihood
"""
import pylab as pb
import numpy as np
import GPy
pb.ion()
pb.close('all')
# Inputs
N = 30
X1 = np.random.normal(5,2,N/2)
X2 = np.random.normal(10,2,N/2)
X = np.hstack([X1,X2])[:,None]
# Outputs
Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
# Kernel object
kernel = GPy.kern.rbf(1)
# Define likelihood
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood_object = GPy.likelihoods.EP(Y,distribution)
# Model definition
m = GPy.models.GP(X,kernel,likelihood=likelihood_object)
m.ensure_default_constraints()
m.update_likelihood_approximation()
#m.checkgrad(verbose=1)
m.optimize()
print "Round 2"
#rm.update_likelihood_approximation()
#m.EPEM()
m.plot()
#print(m)

View file

@ -67,8 +67,8 @@ class EP(likelihood):
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
self.mu = np.zeros(self.N)
self.Sigma = K.copy()
mu = np.zeros(self.N)
Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
@ -96,29 +96,27 @@ class EP(likelihood):
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
print 1./self.Sigma[i,i],self.tau_tilde[i]
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
si=self.Sigma[:,i].reshape(self.N,1)
self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
self.mu = np.dot(self.Sigma,self.v_tilde)
si=Sigma[:,i].reshape(self.N,1)
Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
print self.tau_tilde[i]
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
self.Sigma = K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.v_tilde)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
@ -141,7 +139,7 @@ class EP(likelihood):
"""
Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
KmnKnm = np.dot(Kmn, Kmn.T)
KmmiKmn = np.dot(Kmmi,self.Kmn)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
@ -223,13 +221,13 @@ class EP(likelihood):
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.P0 = self.Kmn.T
self.KmnKnm = np.dot(self.P0.T, self.P0)
self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
self.Diag0 = self.Knn_diag - self.Qnn_diag
self.R0 = jitchol(self.Kmmi).T
Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
@ -238,11 +236,11 @@ class EP(likelihood):
"""
self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M)
self.mu = np.zeros(self.N)
self.P = self.P0.copy()
self.R = self.R0.copy()
self.Diag = self.Diag0.copy()
self.Sigma_diag = self.Knn_diag
mu = np.zeros(self.N)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
"""
Initial values - Cavity distribution parameters:
@ -270,41 +268,41 @@ class EP(likelihood):
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*self.Diag[i] + 1.
dii = self.Diag[i]
self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = self.P[i,:].reshape(1,self.M)
self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(self.R,pi_.T)
RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
self.R = jitchol(RTR).T
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,self.M)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
self.mu = self.w + np.dot(self.P,self.gamma)
self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
self.P = (self.Diag / self.Diag0)[:,None] * self.P0
self.RPT0 = np.dot(self.R0,self.P0.T)
L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
self.w = self.Diag * self.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
P = (Diag / Diag0)[:,None] * P0
RPT0 = np.dot(R0,P0.T)
L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())

View file

@ -7,7 +7,6 @@ from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
#from . import EP
class likelihood_function:
"""

View file

@ -7,7 +7,7 @@ import pylab as pb
from .. import kern
from ..core import model
from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot, Tango
from ..util.plot import gpplot,x_frame, Tango
from ..likelihoods import EP
class GP(model):
@ -175,37 +175,7 @@ class GP(model):
return mean, _5pc, _95pc
def _x_frame(self,plot_limits=None,which_data='all',which_functions='all',resolution=None):
"""
Internal helper function for making plots, return a set of new input values to plot as well as lower and upper limits
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
X = self.X[which_data,:]
Y = self.likelihood.Y[which_data,:]
if plot_limits is None:
xmin,xmax = X.min(0),X.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
if self.X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
elif self.X.shape[1]==2:
resolution = resolution or 50
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
return Xnew, xmin, xmax
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
def plot_GP(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
"""
Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
@ -223,21 +193,29 @@ class GP(model):
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
"""
"""
Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
"""
Xnew, xmin, xmax = self._x_frame()
m,v = self._raw_predict(Xnew)
if isinstance(self.likelihood,EP):
pb.subplot(211)
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
Xnew, xmin, xmax = x_frame(self.X, plot_limits=plot_limits)
m,v = self._raw_predict(Xnew, slices=which_functions)
gpplot(Xnew,m,m-np.sqrt(v),m+np.sqrt(v))
pb.plot(self.X,self.likelihood.Y,'kx',mew=1.5)
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
pb.xlim(xmin,xmax)
if isinstance(self.likelihood,EP):
pb.subplot(212)
phi_m,phi_l,phi_u = self.likelihood.predictive_values(m,v)
gpplot(Xnew,phi_m,phi_l,phi_u)
pb.plot(self.X,self.likelihood.data,'kx',mew=1.5)
pb.xlim(xmin,xmax)
def plot_output(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
Xnew, xmin, xmax = x_frame(self.X, plot_limits=plot_limits)
m, lower, upper = self.predict(Xnew, slices=which_functions)
gpplot(Xnew,m, lower, upper)
pb.plot(self.X[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
ymin,ymax = self.likelihood.data.min()*1.2,self.likelihood.data.max()*1.2
pb.xlim(xmin,xmax)
pb.ylim(ymin,ymax)

View file

@ -154,17 +154,16 @@ class GradientTests(unittest.TestCase):
m.constrain_positive('(linear|bias|white)')
self.assertTrue(m.checkgrad())
def test_GP_EP(self):
return # Disabled TODO
def test_GP_EP_probit(self):
N = 20
X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
k = GPy.kern.rbf(1) + GPy.kern.white(1)
Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
likelihood = GPy.inference.likelihoods.probit(Y)
m = GPy.models.GP_EP(X,likelihood,k)
m.constrain_positive('(var|len)')
m.approximate_likelihood()
self.assertTrue(m.checkgrad())
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y,distribution)
m = GPy.models.GP(X,kernel,likelihood=likelihood)
m.ensure_default_constraints()
self.assertTrue(m.EPEM)
@unittest.skip("FITC will be broken for a while")
def test_generalized_FITC(self):

View file

@ -70,4 +70,24 @@ def align_subplots(N,M,xlim=None, ylim=None):
else:
removeUpperTicks()
def x_frame(X,plot_limits=None,resolution=None):
"""
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
"""
if plot_limits is None:
xmin,xmax = X.min(0),X.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
if X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
elif X.shape[1]==2:
resolution = resolution or 50
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
return Xnew, xmin, xmax