Fixing GP_EP

This commit is contained in:
Ricardo 2013-01-25 09:30:31 +00:00
parent d286ffe633
commit b6ffb57263
8 changed files with 638 additions and 5 deletions

View file

@ -76,11 +76,10 @@ def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_see
# create simple GP model
if model_type=='Full':
m = GPy.models.simple_GP_EP(data['X'],likelihood)
m = GPy.models.GP_EP(data['X'],likelihood)
else:
# create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
m.constrain_positive('var')
m.constrain_positive('len')

39
GPy/examples/ep_fix.py Normal file
View file

@ -0,0 +1,39 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Simple Gaussian Processes classification
"""
import pylab as pb
import numpy as np
import GPy
pb.ion()
default_seed=10000
model_type='Full'
inducing=4
seed=default_seed
"""Simple 1D classification example.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation (default is 4).
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
m = GPy.models.GP_EP2(data['X'],likelihood)
#m.constrain_positive('var')
#m.constrain_positive('len')
#m.tie_param('lengthscale')
m.approximate_likelihood()
# Optimize and plot
#m.optimize()
#m.em(plot_all=False) # EM algorithm
m.plot()
print(m)

314
GPy/inference/EP.py Normal file
View file

@ -0,0 +1,314 @@
import numpy as np
import random
import pylab as pb #TODO erase me
from scipy import stats, linalg
from .likelihoods import likelihood
from ..core import model
from ..util.linalg import pdinv,mdot,jitchol
from ..util.plot import gpplot
from .. import kern
class EP:
def __init__(self,covariance,likelihood,Kmn=None,Knn_diag=None,epsilon=1e-3,powerep=[1.,1.]):
"""
Expectation Propagation
Arguments
---------
X : input observations
likelihood : Output's likelihood (likelihood class)
kernel : a GPy kernel (kern class)
inducing : Either an array specifying the inducing points location or a sacalar defining their number. None value for using a non-sparse model is used.
powerep : Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
"""
self.likelihood = likelihood
assert covariance.shape[0] == covariance.shape[1]
if Kmn is not None:
self.Kmm = covariance
self.Kmn = Kmn
self.M = self.Kmn.shape[0]
self.N = self.Kmn.shape[1]
assert self.M < self.N, 'The number of inducing inputs must be smaller than the number of observations'
else:
self.K = covariance
self.N = self.K.shape[0]
if Knn_diag is not None:
self.Knn_diag = Knn_diag
assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
self.epsilon = epsilon
self.eta, self.delta = powerep
self.jitter = 1e-12
"""
Initial values - Likelihood approximation parameters:
p(y|f) = t(f|tau_tilde,v_tilde)
"""
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
def restart_EP(self):
"""
Set the EP approximation to initial state
"""
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
self.mu = np.zeros(self.N)
class Full(EP):
def fit_EP(self):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006 (pag. 52-60)
"""
#Prior distribution parameters: p(f|X) = N(f|0,K)
#self.K = self.kernel.K(self.X,self.X)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
self.mu=np.zeros(self.N)
self.Sigma=self.K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
self.mu_hat = mu_hat #TODO erase me
self.sigma2_hat = sigma2_hat #TODO erase me
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.arange(self.N)
#random.shuffle(update_order) #TODO uncomment
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
self.mu_hat[i] = mu_hat[i] #TODO erase me
self.sigma2_hat[i] = sigma2_hat[i] #TODO erase me
#if i == 3:
# a = b
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
print Delta_tau
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
si=self.Sigma[:,i].reshape(self.N,1)
self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
self.mu = np.dot(self.Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*(self.K)
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
self.Sigma = self.K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
class DTC(EP):
def fit_EP(self):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
self.Kmmi, self.Kmm_hld = pdinv(self.Kmm)
self.KmnKnm = np.dot(self.Kmn, self.Kmn.T)
self.KmmiKmn = np.dot(self.Kmmi,self.Kmn)
self.Qnn_diag = np.sum(self.Kmn*self.KmmiKmn,-2)
self.LLT0 = self.Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*gamma
"""
self.mu = np.zeros(self.N)
self.LLT = self.Kmm.copy()
self.Sigma_diag = self.Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.arange(self.N)
random.shuffle(update_order)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
self.LLT = self.LLT + np.outer(self.Kmn[:,i],self.Kmn[:,i])*Delta_tau
L = jitchol(self.LLT)
V,info = linalg.flapack.dtrtrs(L,self.Kmn,lower=1)
self.Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
self.mu = self.mu + (Delta_v-Delta_tau*self.mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
self.LLT0 = self.LLT0 + np.dot(self.Kmn*self.tau_tilde[None,:],self.Kmn.T)
self.L = jitchol(self.LLT)
V,info = linalg.flapack.dtrtrs(L,self.Kmn,lower=1)
V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
self.Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(self.Kmn,self.v_tilde)
self.mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
class FITC(EP):
def fit_EP(self):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
self.Kmmi, self.Kmm_hld = pdinv(self.Kmm)
self.P0 = self.Kmn.T
self.KmnKnm = np.dot(self.P0.T, self.P0)
self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
self.Diag0 = self.Knn_diag - self.Qnn_diag
self.R0 = jitchol(self.Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*gamma
"""
self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M)
self.mu = np.zeros(self.N)
self.P = self.P0.copy()
self.R = self.R0.copy()
self.Diag = self.Diag0.copy()
self.Sigma_diag = self.Knn_diag
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.arange(self.N)
random.shuffle(update_order)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*self.Diag[i] + 1.
dii = self.Diag[i]
self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = self.P[i,:].reshape(1,self.M)
self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(self.R,pi_.T)
RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
self.R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
self.mu = self.w + np.dot(self.P,self.gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
self.P = (self.Diag / self.Diag0)[:,None] * self.P0
self.RPT0 = np.dot(self.R0,self.P0.T)
L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
self.w = self.Diag * self.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())

View file

@ -116,7 +116,7 @@ class Full(EP_base):
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
if messages:
print "EP iteration %i, epsiolon %d"%(self.iterations,epsilon_np1)
print "EP iteration %i, epsilon %d"%(self.iterations,epsilon_np1)
class FITC(EP_base):
"""

View file

@ -32,7 +32,7 @@ class likelihood:
"""
assert X_new.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X_new,Mean_new,Var_new)
pb.errorbar(X_u,Mean_u,2*np.sqrt(Var_u),fmt='r+')
pb.errorbar(X_u.flatten(),Mean_u.flatten(),2*np.sqrt(Var_u.flatten()),fmt='r+')
pb.plot(X_u,Mean_u,'ro')
def plot2D(self,X,X_new,F_new,U=None):

View file

@ -57,7 +57,7 @@ class GP_EP(model):
def posterior_param(self):
self.K = self.kernel.K(self.X)
self.Sroot_tilde_K = np.sqrt(self.ep_approx.tau_tilde)[:,None]*self.K
B = np.eye(self.N) + np.sqrt(self.ep_approx.tau_tilde)[None,:]*self.Sroot_tilde_K
B = np.eye(self.N) + np.sqrt(self.ep_approx.tau_tilde)*self.Sroot_tilde_K
#self.L = np.linalg.cholesky(B)
self.L = jitchol(B)
V,info = linalg.flapack.dtrtrs(self.L,self.Sroot_tilde_K,lower=1)

280
GPy/models/GP_EP2.py Normal file
View file

@ -0,0 +1,280 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from scipy import stats, linalg
from .. import kern
from ..inference.EP import Full
from ..inference.likelihoods import likelihood,probit,poisson,gaussian
from ..core import model
from ..util.linalg import pdinv,mdot #,jitchol
from ..util.plot import gpplot, Tango
class GP_EP2(model):
def __init__(self,X,likelihood,kernel=None,normalize_X=False,Xslices=None,epsilon_ep=1e-3,epsion_em=.1,powerep=[1.,1.]):
"""
Simple Gaussian Process with Non-Gaussian likelihood
Arguments
---------
:param X: input observations (NxD numpy.darray)
:param likelihood: a GPy likelihood (likelihood class)
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 1e-3
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] (list)
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
:rtype: model object.
"""
#.. Note:: Multiple independent outputs are allowed using columns of Y #TODO add this note?
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
# parse arguments
self.Xslices = Xslices
assert isinstance(kernel, kern.kern)
self.likelihood = likelihood
#self.Y = self.likelihood.Y #we might not need this
self.kern = kernel
self.X = X
assert len(self.X.shape)==2
#assert len(self.Y.shape)==2
#assert self.X.shape[0] == self.Y.shape[0]
#self.N, self.D = self.Y.shape
self.D = 1
self.N, self.Q = self.X.shape
#here's some simple normalisation
if normalize_X:
self._Xmean = X.mean(0)[None,:]
self._Xstd = X.std(0)[None,:]
self.X = (X.copy() - self._Xmean) / self._Xstd
if hasattr(self,'Z'):
self.Z = (self.Z - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.X.shape[1]))
self._Xstd = np.ones((1,self.X.shape[1]))
#THIS PART IS NOT NEEDED
"""
if normalize_Y:
self._Ymean = Y.mean(0)[None,:]
self._Ystd = Y.std(0)[None,:]
self.Y = (Y.copy()- self._Ymean) / self._Ystd
else:
self._Ymean = np.zeros((1,self.Y.shape[1]))
self._Ystd = np.ones((1,self.Y.shape[1]))
if self.D > self.N:
# then it's more efficient to store YYT
self.YYT = np.dot(self.Y, self.Y.T)
else:
self.YYT = None
"""
self.eta,self.delta = powerep
self.epsilon_ep = epsilon_ep
self.tau_tilde = np.zeros([self.N,self.D])
self.v_tilde = np.zeros([self.N,self.D])
model.__init__(self)
def _set_params(self,p):
self.kern._set_params_transformed(p)
self.K = self.kern.K(self.X,slices1=self.Xslices)
self.posterior_params()
def _get_params(self):
return self.kern._get_params_transformed()
def _get_param_names(self):
return self.kern._get_param_names_transformed()
def approximate_likelihood(self):
self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
self.ep_approx.fit_EP()
self.tau_tilde = self.ep_approx.tau_tilde[:,None]
self.v_tilde = self.ep_approx.tau_tilde[:,None]
self.posterior_params()
self.Y = self.v_tilde/self.tau_tilde
self._Ymean = np.zeros((1,self.Y.shape[1]))
self._Ystd = np.ones((1,self.Y.shape[1]))
#self.YYT = np.dot(self.Y, self.Y.T)
def posterior_params(self):
self.Sroot_tilde_K = np.sqrt(self.tau_tilde.flatten())[:,None]*self.K
B = np.eye(self.N) + np.sqrt(self.tau_tilde.flatten())[None,:]*self.Sroot_tilde_K
self.Bi,self.L,self.Li,B_logdet = pdinv(B)
V = np.dot(self.Li,self.Sroot_tilde_K)
#V,info = linalg.flapack.dtrtrs(self.L,self.Sroot_tilde_K,lower=1)
self.Sigma = self.K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.v_tilde.flatten())
#def _model_fit_term(self):
# """
# Computes the model fit using YYT if it's available
# """
# if self.YYT is None:
# return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
# else:
# return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
def log_likelihood(self):
mu_ = self.ep_approx.v_/self.ep_approx.tau_
L1 =.5*sum(np.log(1+self.ep_approx.tau_tilde*1./self.ep_approx.tau_))-sum(np.log(np.diag(self.L)))
L2A =.5*np.sum((self.Sigma-np.diag(1./(self.ep_approx.tau_+self.ep_approx.tau_tilde))) * np.dot(self.ep_approx.v_tilde[:,None],self.ep_approx.v_tilde[None,:]))
L2B = .5*np.dot(mu_*(self.ep_approx.tau_/(self.ep_approx.tau_tilde+self.ep_approx.tau_)),self.ep_approx.tau_tilde*mu_ - 2*self.ep_approx.v_tilde)
L3 = sum(np.log(self.ep_approx.Z_hat))
return L1 + L2A + L2B + L3
def dL_dK(self): #FIXME
if self.YYT is None:
alpha = np.dot(self.Ki,self.Y)
dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
else:
dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
return dL_dK
def _log_likelihood_gradients(self): #FIXME
return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
def predict(self,Xnew, slices=None, full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.Q
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
:type slices: (None, list of slice objects, list of ints)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
- If None, the new points covary throigh every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalisations of the output dimensions.
"""
#normalise X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var, phi = self._raw_predict(Xnew, slices, full_cov)
#un-normalise
mu = mu*self._Ystd + self._Ymean
if full_cov:
if self.D==1:
var *= np.square(self._Ystd)
else:
var = var[:,:,None] * np.square(self._Ystd)
else:
if self.D==1:
var *= np.square(np.squeeze(self._Ystd))
else:
var = var[:,None] * np.square(self._Ystd)
return mu,var,phi
def _raw_predict(self,_Xnew,slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
"""
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
KiKx = np.dot(self.Ki,Kx)
if full_cov:
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
var = Kxx - np.dot(KiKx.T,Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
return mu, var
"""
K_x = self.kern.K(self.X,_Xnew)
Kxx = self.kern.K(_Xnew)
#aux1,info = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
#aux2,info = linalg.flapack.dtrtrs(self.L.T, aux1,lower=0)
#aux2 = mdot(self.Li.T,self.Li,self.Sroot_tilde_K,self.ep_approx.v_tilde)
aux2 = mdot(self.Bi,self.Sroot_tilde_K,self.ep_approx.v_tilde)
zeta = np.sqrt(self.ep_approx.tau_tilde)*aux2
f = np.dot(K_x.T,self.ep_approx.v_tilde-zeta)
#v,info = linalg.flapack.dtrtrs(self.L,np.sqrt(self.ep_approx.tau_tilde)[:,None]*K_x,lower=1)
v = mdot(self.Li,np.sqrt(self.ep_approx.tau_tilde)[:,None]*K_x)
variance = Kxx - np.dot(v.T,v)
vdiag = np.diag(variance)
y=self.likelihood.predictive_mean(f,vdiag)
return f,vdiag,y
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
"""
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_functions: which of the kernel functions to plot (additively)
:type which_functions: list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
X = self.X[which_data,:]
Y = self.Y[which_data,:]
Xorig = X*self._Xstd + self._Xmean
Yorig = Y*self._Ystd + self._Ymean
if plot_limits is None:
xmin,xmax = Xorig.min(0),Xorig.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
if self.X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
#m,v,phi = self.predict(Xnew,slices=which_functions)
#gpplot(Xnew,m,v)
mu_f, var_f, phi_f = self.predict(Xnew,slices=which_functions)
pb.subplot(211)
self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.X,Mean_u=self.mu,Var_u=np.diag(self.Sigma))
if samples:
s = np.random.multivariate_normal(m.flatten(),v,samples)
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
pb.xlim(xmin,xmax)
pb.subplot(212)
self.likelihood.plot1Db(self.X,Xnew,phi_f)
elif self.X.shape[1]==2:
resolution = 50 or resolution
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
zz,vv = self.predict(Xtest,slices=which_functions)
zz = zz.reshape(resolution,resolution)
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"

View file

@ -7,6 +7,7 @@ from sparse_GP_regression import sparse_GP_regression
from GPLVM import GPLVM
from warped_GP import warpedGP
from GP_EP import GP_EP
from GP_EP2 import GP_EP2
from generalized_FITC import generalized_FITC
from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP