EPEM is running.

This commit is contained in:
Ricardo Andrade 2013-02-01 15:14:11 +00:00
parent f941d629e6
commit 0a8686d7c0
5 changed files with 117 additions and 112 deletions

View file

@ -1,7 +1,6 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
""" """
Simple Gaussian Processes classification 1D Simple Gaussian Processes classification 1D
probit likelihood probit likelihood
@ -19,25 +18,36 @@ X1 = np.random.normal(5,2,N/2)
X2 = np.random.normal(10,2,N/2) X2 = np.random.normal(10,2,N/2)
X = np.hstack([X1,X2])[:,None] X = np.hstack([X1,X2])[:,None]
# Outputs # Output
Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None] Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
# Kernel object # Kernel object
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
# Define likelihood # Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood_object = GPy.likelihoods.EP(Y,distribution) likelihood = GPy.likelihoods.EP(Y,distribution)
# Model definition # Model definition
m = GPy.models.GP(X,kernel,likelihood=likelihood_object) m = GPy.models.GP(X,kernel,likelihood=likelihood)
m.ensure_default_constraints()
m.update_likelihood_approximation()
#m.checkgrad(verbose=1)
m.optimize()
print "Round 2"
#rm.update_likelihood_approximation()
#m.EPEM() # Model constraints
m.plot() m.ensure_default_constraints()
#print(m)
# Optimize model
"""
EPEM runs a loop that consists of two steps:
1) EP likelihood approximation:
m.update_likelihood_approximation()
2) Parameters optimization:
m.optimize()
"""
m.EPEM()
# Plot
pb.subplot(211)
m.plot_GP()
pb.subplot(212)
m.plot_output()
print(m)

View file

@ -65,8 +65,8 @@ class EP(likelihood):
self.tau_tilde = np.zeros(self.N) self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
self.mu = np.zeros(self.N) mu = np.zeros(self.N)
self.Sigma = K.copy() Sigma = K.copy()
""" """
Initial values - Cavity distribution parameters: Initial values - Cavity distribution parameters:
@ -94,29 +94,27 @@ class EP(likelihood):
update_order = np.random.permutation(self.N) update_order = np.random.permutation(self.N)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
print 1./self.Sigma[i,i],self.tau_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
si=self.Sigma[:,i].reshape(self.N,1) si=Sigma[:,i].reshape(self.N,1)
self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T) Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
self.mu = np.dot(self.Sigma,self.v_tilde) mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1 self.iterations += 1
print self.tau_tilde[i]
#Sigma recomptutation with Cholesky decompositon #Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B) L = jitchol(B)
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1) V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
self.Sigma = K - np.dot(V.T,V) Sigma = K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.v_tilde) mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy()) self.np1.append(self.tau_tilde.copy())
@ -139,7 +137,7 @@ class EP(likelihood):
""" """
Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm) Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
KmnKnm = np.dot(Kmn, Kmn.T) KmnKnm = np.dot(Kmn, Kmn.T)
KmmiKmn = np.dot(Kmmi,self.Kmn) KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2) Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy() LLT0 = Kmm.copy()
@ -221,13 +219,13 @@ class EP(likelihood):
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
""" """
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
self.P0 = self.Kmn.T P0 = Kmn.T
self.KmnKnm = np.dot(self.P0.T, self.P0) KmnKnm = np.dot(P0.T, P0)
self.KmmiKmn = np.dot(self.Kmmi,self.P0.T) KmmiKmn = np.dot(Kmmi,P0.T)
self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2) Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
self.Diag0 = self.Knn_diag - self.Qnn_diag Diag0 = Knn_diag - Qnn_diag
self.R0 = jitchol(self.Kmmi).T R0 = jitchol(Kmmi).T
""" """
Posterior approximation: q(f|y) = N(f| mu, Sigma) Posterior approximation: q(f|y) = N(f| mu, Sigma)
@ -236,11 +234,11 @@ class EP(likelihood):
""" """
self.w = np.zeros(self.N) self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M) self.gamma = np.zeros(self.M)
self.mu = np.zeros(self.N) mu = np.zeros(self.N)
self.P = self.P0.copy() P = P0.copy()
self.R = self.R0.copy() R = R0.copy()
self.Diag = self.Diag0.copy() Diag = Diag0.copy()
self.Sigma_diag = self.Knn_diag Sigma_diag = Knn_diag
""" """
Initial values - Cavity distribution parameters: Initial values - Cavity distribution parameters:
@ -268,41 +266,41 @@ class EP(likelihood):
update_order = np.random.permutation(self.N) update_order = np.random.permutation(self.N)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
dtd1 = Delta_tau*self.Diag[i] + 1. dtd1 = Delta_tau*Diag[i] + 1.
dii = self.Diag[i] dii = Diag[i]
self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = self.P[i,:].reshape(1,self.M) pi_ = P[i,:].reshape(1,self.M)
self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(self.R,pi_.T) Rp_i = np.dot(R,pi_.T)
RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R)) RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
self.R = jitchol(RTR).T R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1 self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T) self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
self.RPT = np.dot(self.R,self.P.T) RPT = np.dot(R,P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.mu = self.w + np.dot(self.P,self.gamma) mu = self.w + np.dot(P,self.gamma)
self.iterations += 1 self.iterations += 1
#Sigma recomptutation with Cholesky decompositon #Sigma recomptutation with Cholesky decompositon
self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde) Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
self.P = (self.Diag / self.Diag0)[:,None] * self.P0 P = (Diag / Diag0)[:,None] * P0
self.RPT0 = np.dot(self.R0,self.P0.T) RPT0 = np.dot(R0,P0.T)
L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T)) L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1) R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
self.RPT = np.dot(self.R,self.P.T) RPT = np.dot(R,P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = self.Diag * self.v_tilde self.w = Diag * self.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde)) self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma) mu = self.w + np.dot(P,self.gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy()) self.np1.append(self.tau_tilde.copy())

View file

@ -7,7 +7,6 @@ from scipy import stats
import scipy as sp import scipy as sp
import pylab as pb import pylab as pb
from ..util.plot import gpplot from ..util.plot import gpplot
#from . import EP
class likelihood_function: class likelihood_function:
""" """

View file

@ -7,7 +7,7 @@ import pylab as pb
from .. import kern from .. import kern
from ..core import model from ..core import model
from ..util.linalg import pdinv,mdot from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot, Tango from ..util.plot import gpplot,x_frame, Tango
from ..likelihoods import EP from ..likelihoods import EP
class GP(model): class GP(model):
@ -175,37 +175,7 @@ class GP(model):
return mean, _5pc, _95pc return mean, _5pc, _95pc
def _x_frame(self,plot_limits=None,which_data='all',which_functions='all',resolution=None): def plot_GP(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
"""
Internal helper function for making plots, return a set of new input values to plot as well as lower and upper limits
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
X = self.X[which_data,:]
Y = self.likelihood.Y[which_data,:]
if plot_limits is None:
xmin,xmax = X.min(0),X.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
if self.X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
elif self.X.shape[1]==2:
resolution = resolution or 50
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
return Xnew, xmin, xmax
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
""" """
Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
@ -223,21 +193,29 @@ class GP(model):
- In higher dimensions, we've no implemented this yet !TODO! - In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions Can plot only part of the data and part of the posterior functions using which_data and which_functions
"""
"""
Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
""" """
Xnew, xmin, xmax = self._x_frame() if which_functions=='all':
m,v = self._raw_predict(Xnew) which_functions = [True]*self.kern.Nparts
if isinstance(self.likelihood,EP): if which_data=='all':
pb.subplot(211) which_data = slice(None)
Xnew, xmin, xmax = x_frame(self.X, plot_limits=plot_limits)
m,v = self._raw_predict(Xnew, slices=which_functions)
gpplot(Xnew,m,m-np.sqrt(v),m+np.sqrt(v)) gpplot(Xnew,m,m-np.sqrt(v),m+np.sqrt(v))
pb.plot(self.X,self.likelihood.Y,'kx',mew=1.5) pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
pb.xlim(xmin,xmax) pb.xlim(xmin,xmax)
if isinstance(self.likelihood,EP): def plot_output(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
pb.subplot(212) if which_functions=='all':
phi_m,phi_l,phi_u = self.likelihood.predictive_values(m,v) which_functions = [True]*self.kern.Nparts
gpplot(Xnew,phi_m,phi_l,phi_u) if which_data=='all':
pb.plot(self.X,self.likelihood.data,'kx',mew=1.5) which_data = slice(None)
pb.xlim(xmin,xmax) Xnew, xmin, xmax = x_frame(self.X, plot_limits=plot_limits)
m, lower, upper = self.predict(Xnew, slices=which_functions)
gpplot(Xnew,m, lower, upper)
pb.plot(self.X[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
ymin,ymax = self.likelihood.data.min()*1.2,self.likelihood.data.max()*1.2
pb.xlim(xmin,xmax)
pb.ylim(ymin,ymax)

View file

@ -70,4 +70,24 @@ def align_subplots(N,M,xlim=None, ylim=None):
else: else:
removeUpperTicks() removeUpperTicks()
def x_frame(X,plot_limits=None,resolution=None):
"""
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
"""
if plot_limits is None:
xmin,xmax = X.min(0),X.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
if X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
elif X.shape[1]==2:
resolution = resolution or 50
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
return Xnew, xmin, xmax