No more GP_EP stuff

This commit is contained in:
Ricardo Andrade 2013-01-25 18:24:10 +00:00
parent 6a2e0a1fe5
commit 738ca78dac
5 changed files with 2 additions and 682 deletions

View file

@ -1,240 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import random
from scipy import stats, linalg
from .likelihoods import likelihood
from ..core import model
from ..util.linalg import pdinv,mdot,jitchol
from ..util.plot import gpplot
from .. import kern
class EP_base:
"""
Expectation Propagation.
This is just the base class for expectation propagation. We'll extend it for full and sparse EP.
"""
def __init__(self,likelihood,epsilon=1e-3,powerep=[1.,1.]):
self.likelihood = likelihood
self.epsilon = epsilon
self.eta, self.delta = powerep
self.jitter = 1e-12
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.restart_EP()
def restart_EP(self):
"""
Set the EP approximation to initial state
"""
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
self.mu = np.zeros(self.N)
class Full(EP_base):
"""
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param K: prior covariance matrix
:type K: np.ndarray (N x N)
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
"""
def __init__(self,K,likelihood,*args,**kwargs):
assert K.shape[0] == K.shape[1]
self.K = K
self.N = self.K.shape[0]
EP_base.__init__(self,likelihood,*args,**kwargs)
def fit_EP(self,messages=False):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006 (pag. 52-60)
"""
#Prior distribution parameters: p(f|X) = N(f|0,K)
#self.K = self.kernel.K(self.X,self.X)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
self.mu=np.zeros(self.N)
self.Sigma=self.K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=np.float64)
self.v_ = np.empty(self.N,dtype=np.float64)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=np.float64)
self.Z_hat = np.empty(self.N,dtype=np.float64)
phi = np.empty(self.N,dtype=np.float64)
mu_hat = np.empty(self.N,dtype=np.float64)
sigma2_hat = np.empty(self.N,dtype=np.float64)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
si=self.Sigma[:,i].reshape(self.N,1)
self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
self.mu = np.dot(self.Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*(self.K)
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
self.Sigma = self.K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.v_tilde)
epsilon_np1 = np.mean(self.tau_tilde-self.np1[-1]**2)
epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2)
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
if messages:
print "EP iteration %i, epsilon %d"%(self.iterations,epsilon_np1)
class FITC(EP_base):
"""
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param Knn_diag: The diagonal elements of Knn is a 1D vector
:param Kmn: The 'cross' variance between inducing inputs and data
:param Kmm: the covariance matrix of the inducing inputs
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
"""
def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs):
self.Knn_diag = Knn_diag
self.Kmn = Kmn
self.Kmm = Kmm
self.M = self.Kmn.shape[0]
self.N = self.Kmn.shape[1]
assert self.M <= self.N, 'The number of inducing inputs must be smaller than the number of observations'
assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
EP_base.__init__(self,likelihood,*args,**kwargs)
def fit_EP(self):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
self.Kmmi, self.Kmm_hld = pdinv(self.Kmm)
self.P0 = self.Kmn.T
self.KmnKnm = np.dot(self.P0.T, self.P0)
self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
self.Diag0 = self.Knn_diag - self.Qnn_diag
self.R0 = jitchol(self.Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*gamma
"""
self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M)
self.mu = np.zeros(self.N)
self.P = self.P0.copy()
self.R = self.R0.copy()
self.Diag = self.Diag0.copy()
self.Sigma_diag = self.Knn_diag
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=np.float64)
self.v_ = np.empty(self.N,dtype=np.float64)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=np.float64)
self.Z_hat = np.empty(self.N,dtype=np.float64)
phi = np.empty(self.N,dtype=np.float64)
mu_hat = np.empty(self.N,dtype=np.float64)
sigma2_hat = np.empty(self.N,dtype=np.float64)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.arange(self.N)
random.shuffle(update_order)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*self.Diag[i] + 1.
dii = self.Diag[i]
self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = self.P[i,:].reshape(1,self.M)
self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(self.R,pi_.T)
RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
self.R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
self.mu = self.w + np.dot(self.P,self.gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
self.P = (self.Diag / self.Diag0)[:,None] * self.P0
self.RPT0 = np.dot(self.R0,self.P0.T)
L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
self.w = self.Diag * self.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())

View file

@ -1,160 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from scipy import stats, linalg
from .. import kern
from ..inference.Expectation_Propagation import Full
from ..inference.likelihoods import likelihood,probit#,poisson,gaussian
from ..core import model
from ..util.linalg import pdinv,jitchol
from ..util.plot import gpplot
class GP_EP(model):
def __init__(self,X,likelihood,kernel=None,epsilon_ep=1e-3,epsion_em=.1,powerep=[1.,1.]):
"""
Simple Gaussian Process with Non-Gaussian likelihood
Arguments
---------
:param X: input observations (NxD numpy.darray)
:param likelihood: a GPy likelihood (likelihood class)
:param kernel: a GPy kernel (kern class)
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 (float)
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] (list)
:rtype: GPy model class.
"""
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
assert isinstance(kernel,kern.kern), 'kernel is not a kern instance'
self.likelihood = likelihood
self.Y = self.likelihood.Y
self.kernel = kernel
self.X = X
self.N, self.D = self.X.shape
self.eta,self.delta = powerep
self.epsilon_ep = epsilon_ep
self.jitter = 1e-12
self.K = self.kernel.K(self.X)
model.__init__(self)
def _set_params(self,p):
self.kernel._set_params_transformed(p)
def _get_params(self):
return self.kernel._get_params_transformed()
def _get_param_names(self):
return self.kernel._get_param_names_transformed()
def approximate_likelihood(self):
self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
self.ep_approx.fit_EP()
def posterior_param(self):
self.K = self.kernel.K(self.X)
self.Sroot_tilde_K = np.sqrt(self.ep_approx.tau_tilde)[:,None]*self.K
B = np.eye(self.N) + np.sqrt(self.ep_approx.tau_tilde)*self.Sroot_tilde_K
#self.L = np.linalg.cholesky(B)
self.L = jitchol(B)
V,info = linalg.flapack.dtrtrs(self.L,self.Sroot_tilde_K,lower=1)
self.Sigma = self.K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.ep_approx.v_tilde) * self.Z_hat
def log_likelihood(self):
"""
Returns
-------
The EP approximation to the log-marginal likelihood
"""
self.posterior_param()
mu_ = self.ep_approx.v_/self.ep_approx.tau_
L1 =.5*sum(np.log(1+self.ep_approx.tau_tilde*1./self.ep_approx.tau_))-sum(np.log(np.diag(self.L)))
L2A =.5*np.sum((self.Sigma-np.diag(1./(self.ep_approx.tau_+self.ep_approx.tau_tilde))) * np.dot(self.ep_approx.v_tilde[:,None],self.ep_approx.v_tilde[None,:]))
L2B = .5*np.dot(mu_*(self.ep_approx.tau_/(self.ep_approx.tau_tilde+self.ep_approx.tau_)),self.ep_approx.tau_tilde*mu_ - 2*self.ep_approx.v_tilde)
L3 = sum(np.log(self.ep_approx.Z_hat))
return L1 + L2A + L2B + L3
def _log_likelihood_gradients(self):
dK_dp = self.kernel.dK_dtheta(self.X)
self.dK_dp = dK_dp
aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
b = self.ep_approx.v_tilde - np.sqrt(self.ep_approx.tau_tilde)*linalg.flapack.dtrtrs(self.L.T,aux1)[0]
U,info_u = linalg.flapack.dtrtrs(self.L,np.diag(np.sqrt(self.ep_approx.tau_tilde)),lower=1)
dL_dK = 0.5*(np.outer(b,b)-np.dot(U.T,U))
self.dL_dK = dL_dK
return np.array([np.sum(dK_dpi*dL_dK) for dK_dpi in dK_dp.T])
def predict(self,X):
#TODO: check output dimensions
self.posterior_param()
K_x = self.kernel.K(self.X,X)
Kxx = self.kernel.K(X)
aux1,info = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
aux2,info = linalg.flapack.dtrtrs(self.L.T, aux1,lower=0)
zeta = np.sqrt(self.ep_approx.tau_tilde)*aux2
f = np.dot(K_x.T,self.ep_approx.v_tilde-zeta)
v,info = linalg.flapack.dtrtrs(self.L,np.sqrt(self.ep_approx.tau_tilde)[:,None]*K_x,lower=1)
variance = Kxx - np.dot(v.T,v)
vdiag = np.diag(variance)
y=self.likelihood.predictive_mean(f,vdiag)
return f,vdiag,y
def plot(self):
"""
Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
"""
if self.X.shape[1]==1:
pb.figure()
xmin,xmax = self.X.min(),self.X.max()
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
Xnew = np.linspace(xmin,xmax,100)[:,None]
mu_f, var_f, mu_phi = self.predict(Xnew)
pb.subplot(211)
self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.X,Mean_u=self.mu,Var_u=np.diag(self.Sigma))
pb.subplot(212)
self.likelihood.plot1Db(self.X,Xnew,mu_phi)
elif self.X.shape[1]==2:
pb.figure()
x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
axis1 = np.linspace(x1min,x1max,50)
axis2 = np.linspace(x2min,x2max,50)
XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
Xnew = np.c_[XX1.flatten(),XX2.flatten()]
f,v,p = self.predict(Xnew)
self.likelihood.plot2D(self.X,Xnew,p)
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
"""
Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
"""
self.epsilon_em = epsilon
log_likelihood_change = self.epsilon_em + 1.
self.parameters_path = [self.kernel._get_params()]
self.approximate_likelihood()
self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
self.log_likelihood_path = [self.log_likelihood()]
iteration = 0
while log_likelihood_change > self.epsilon_em:
print 'EM iteration', iteration
self.optimize(max_f_eval = max_f_eval)
log_likelihood_new = self.log_likelihood()
log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
if log_likelihood_change < 0:
print 'log_likelihood decrement'
self.kernel._set_params_transformed(self.parameters_path[-1])
self.kernM._set_params_transformed(self.parameters_path[-1])
else:
self.approximate_likelihood()
self.log_likelihood_path.append(self.log_likelihood())
self.parameters_path.append(self.kernel._get_params())
self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
iteration += 1

View file

@ -1,279 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from scipy import stats, linalg
from .. import kern
from ..inference.EP import Full
from ..inference.likelihoods import likelihood,probit,poisson,gaussian
from ..core import model
from ..util.linalg import pdinv,mdot #,jitchol
from ..util.plot import gpplot, Tango
class GP_EP2(model):
def __init__(self,X,likelihood,kernel=None,normalize_X=False,Xslices=None,epsilon_ep=1e-3,epsion_em=.1,powerep=[1.,1.]):
"""
Simple Gaussian Process with Non-Gaussian likelihood
Arguments
---------
:param X: input observations (NxD numpy.darray)
:param likelihood: a GPy likelihood (likelihood class)
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 1e-3
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] (list)
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
:rtype: model object.
"""
#.. Note:: Multiple independent outputs are allowed using columns of Y #TODO add this note?
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
# parse arguments
self.Xslices = Xslices
assert isinstance(kernel, kern.kern)
self.likelihood = likelihood
self.kern = kernel
self.X = X
assert len(self.X.shape)==2
assert self.X.shape[0] == self.likelihood.Y.shape[0]
self.D = self.likelihood.Y.shape[1]
self.N, self.Q = self.X.shape
#here's some simple normalisation
if normalize_X:
self._Xmean = X.mean(0)[None,:]
self._Xstd = X.std(0)[None,:]
self.X = (X.copy() - self._Xmean) / self._Xstd
if hasattr(self,'Z'):
self.Z = (self.Z - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.X.shape[1]))
self._Xstd = np.ones((1,self.X.shape[1]))
#THIS PART IS NOT NEEDED
"""
if normalize_Y:
self._Ymean = Y.mean(0)[None,:]
self._Ystd = Y.std(0)[None,:]
self.Y = (Y.copy()- self._Ymean) / self._Ystd
else:
self._Ymean = np.zeros((1,self.Y.shape[1]))
self._Ystd = np.ones((1,self.Y.shape[1]))
if self.D > self.N:
# then it's more efficient to store YYT
self.YYT = np.dot(self.Y, self.Y.T)
else:
self.YYT = None
"""
self.eta,self.delta = powerep
self.epsilon_ep = epsilon_ep
self.tau_tilde = np.ones([self.N,self.D])
self.v_tilde = np.zeros([self.N,self.D])
self.tau_ = np.ones([self.N,self.D])
self.v_ = np.zeros([self.N,self.D])
self.Z_hat = np.ones([self.N,self.D])
model.__init__(self)
def _set_params(self,p):
self.kern._set_params_transformed(p)
self.K = self.kern.K(self.X,slices1=self.Xslices)
self._ep_params()
def _get_params(self):
return self.kern._get_params_transformed()
def _get_param_names(self):
return self.kern._get_param_names_transformed()
def approximate_likelihood(self):
self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
self.tau_tilde, self.v_tilde, self.Z_hat, self.tau_, self.v_=self.ep_approx.fit_EP()
self._ep_params()
def _ep_params(self):
# Posterior mean and Variance computation
self.Sroot_tilde_K = np.sqrt(self.tau_tilde)*self.K
B = np.eye(self.N) + np.sqrt(self.tau_tilde.flatten())[None,:]*self.Sroot_tilde_K
self.Bi,self.L,self.Li,B_logdet = pdinv(B)
V = np.dot(self.Li,self.Sroot_tilde_K)
self.Sigma = self.K - np.dot(V.T,V) #posterior variance
self.mu = np.dot(self.Sigma,self.v_tilde) #posterior mean
# Kernel plus noise variance term
self.Kplus = self.K + np.diag(1./self.tau_tilde.flatten())
self.Kplusi,self.Lplus,self.Lplusi,self.Kplus_logdet = pdinv(self.Kplus)
# Y: EP likelihood is defined as a regression model for mu_tilde
self.Y = self.v_tilde/self.tau_tilde
self._Ymean = np.zeros((1,self.Y.shape[1]))
self._Ystd = np.ones((1,self.Y.shape[1]))
self.YYT = None #np.dot(self.Y, self.Y.T)
self.mu_ = self.v_/self.tau_
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Lplusi,self.Y)))
else:
return -0.5*np.sum(np.multiply(self.Kplusi, self.YYT))
def _normalization_term(self):
"""
Computes the marginal likelihood normalization constants
"""
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.mu_ - self.Y)**2
penalty_term = np.sum(np.log(self.Z_hat))
return penalty_term + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum)
def log_likelihood(self):
"""
The log marginal likelihood for an EP model can be written as the log likelihood of
a regression model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
"""
complexity_term = -0.5*self.D*self.Kplus_logdet
return complexity_term + self._model_fit_term() + self._normalization_term()
def dL_dK(self):
if self.YYT is None:
alpha = np.dot(self.Kplusi,self.Y)
dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Kplusi)
else:
dL_dK = 0.5*(mdot(self.Kplusi, self.YYT, self.Kplusi) - self.D*self.Kplusi)
return dL_dK
def _log_likelihood_gradients(self):
return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
def predict(self,Xnew, slices=None, full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.Q
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
:type slices: (None, list of slice objects, list of ints)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
- If None, the new points covary throigh every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalisations of the output dimensions.
"""
#normalise X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var, phi = self._raw_predict(Xnew, slices, full_cov)
#un-normalise
mu = mu*self._Ystd + self._Ymean
if full_cov:
if self.D==1:
var *= np.square(self._Ystd)
else:
var = var[:,:,None] * np.square(self._Ystd)
else:
if self.D==1:
var *= np.square(np.squeeze(self._Ystd))
else:
var = var[:,None] * np.square(self._Ystd)
return mu,var,phi
def _raw_predict(self,_Xnew,slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
K_x = self.kern.K(self.X,_Xnew,slices1=self.Xslices,slices2=slices)
aux2 = mdot(self.Bi,self.Sroot_tilde_K,self.v_tilde)
zeta = np.sqrt(self.tau_tilde)*aux2
f = np.dot(K_x.T,self.v_tilde-zeta)
v = mdot(self.Li,np.sqrt(self.tau_tilde)*K_x)
if full_cov:
Kxx = self.kern.K(_Xnew,slices1=slices,slices2=slices)
var = Kxx - np.dot(v.T,v)
var_diag = np.diag(var)[:,None]
else:
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
var_diag = (Kxx - np.sum(v**2,-2))[:,None]
phi = self.likelihood.predictive_mean(f,var_diag)
return f, var_diag, phi
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
"""
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_functions: which of the kernel functions to plot (additively)
:type which_functions: list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
X = self.X[which_data,:]
Y = self.Y[which_data,:]
Xorig = X*self._Xstd + self._Xmean
Yorig = Y*self._Ystd + self._Ymean
if plot_limits is None:
xmin,xmax = Xorig.min(0),Xorig.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
if self.X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
#m,v,phi = self.predict(Xnew,slices=which_functions)
#gpplot(Xnew,m,v)
mu_f, var_f, phi_f = self.predict(Xnew,slices=which_functions)
pb.subplot(211)
self.likelihood.plot1Da(X=Xnew,mean=mu_f,var=var_f,Z=self.X,mean_Z=self.mu,var_Z=np.diag(self.Sigma))
if samples:
s = np.random.multivariate_normal(m.flatten(),v,samples)
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
pb.xlim(xmin,xmax)
pb.subplot(212)
self.likelihood.plot1Db(self.X,Xnew,phi_f)
elif self.X.shape[1]==2:
resolution = 50 or resolution
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
zz,vv = self.predict(Xtest,slices=which_functions)
zz = zz.reshape(resolution,resolution)
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"

View file

@ -6,8 +6,6 @@ from GP_regression import GP_regression
from sparse_GP_regression import sparse_GP_regression
from GPLVM import GPLVM
from warped_GP import warpedGP
from GP_EP import GP_EP
from GP_EP2 import GP_EP2
from generalized_FITC import generalized_FITC
from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP

View file

@ -9,7 +9,8 @@ from .. import kern
from ..core import model
from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot
from ..inference.Expectation_Propagation import FITC
#from ..inference.Expectation_Propagation import FITC
from ..inference.EP import FITC
from ..inference.likelihoods import likelihood,probit
class generalized_FITC(model):