Merge branch 'newGP' of github.com:SheffieldML/GPy into newGP

This commit is contained in:
Alan Saul 2013-01-31 11:04:56 +00:00
commit 6b62ae960a
6 changed files with 163 additions and 170 deletions

View file

@ -43,6 +43,6 @@ print m.checkgrad()
# Optimize and plot # Optimize and plot
m.optimize() m.optimize()
#m.em(plot_all=False) # EM algorithm #m.em(plot_all=False) # EM algorithm
m.plot() m.plot(samples=4)
print(m) print(m)

View file

@ -14,6 +14,7 @@ pb.ion()
N = 500 N = 500
M = 5 M = 5
pb.close('all')
###################################### ######################################
## 1 dimensional example ## 1 dimensional example
@ -31,18 +32,29 @@ noise = GPy.kern.white(1)
kernel = rbf + noise kernel = rbf + noise
# create simple GP model # create simple GP model
#m1 = GPy.models.sparse_GP(X, Y, kernel, M=M) #m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
m1 = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
print m1.checkgrad()
# contrain all parameters to be positive # contrain all parameters to be positive
m1.constrain_positive('(variance|lengthscale|precision)') #m.constrain_fixed('prec',100.)
#m1.constrain_positive('(variance|lengthscale)') m = GPy.models.sparse_GP(X, Y, kernel, M=M)
#m1.constrain_fixed('prec',10.) m.ensure_default_constraints()
#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
# m.approximate_likelihood()
print m.checkgrad()
m.optimize('tnc', messages = 1)
m.plot(samples=3)
print m
#check gradient FIXME unit test please n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
# optimize and plot n.ensure_default_constraints()
m1.optimize('tnc', messages = 1) if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian):
m1.plot() n.approximate_likelihood()
# print(m1) print n.checkgrad()
pb.figure()
n.plot()
"""
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.ensure_default_constraints()
print m.checkgrad()
"""

View file

@ -136,7 +136,7 @@ class DTC(EP):
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn Sigma0 = Qnn = Knm*Kmmi*Kmn
""" """
self.Kmmi, self.Kmm_hld = pdinv(self.Kmm) self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.KmnKnm = np.dot(self.Kmn, self.Kmn.T) self.KmnKnm = np.dot(self.Kmn, self.Kmn.T)
self.KmmiKmn = np.dot(self.Kmmi,self.Kmn) self.KmmiKmn = np.dot(self.Kmmi,self.Kmn)
self.Qnn_diag = np.sum(self.Kmn*self.KmmiKmn,-2) self.Qnn_diag = np.sum(self.Kmn*self.KmmiKmn,-2)
@ -222,7 +222,7 @@ class FITC(EP):
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
""" """
self.Kmmi, self.Kmm_hld = pdinv(self.Kmm) self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.P0 = self.Kmn.T self.P0 = self.Kmn.T
self.KmnKnm = np.dot(self.P0.T, self.P0) self.KmnKnm = np.dot(self.P0.T, self.P0)
self.KmmiKmn = np.dot(self.Kmmi,self.P0.T) self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)

View file

@ -83,12 +83,20 @@ class probit(likelihood):
var = var.flatten() var = var.flatten()
return stats.norm.cdf(mu/np.sqrt(1+var)) return stats.norm.cdf(mu/np.sqrt(1+var))
def predictive_var(self,mu,var):
p=self.predictive_mean(mu,var)
return p*(1-p)
def _log_likelihood_gradients(): def _log_likelihood_gradients():
raise NotImplementedError raise NotImplementedError
def plot(self,X,phi,X_obs,Z=None): def plot(self,X,mu,var,phi,X_obs,Z=None,samples=0):
assert X_obs.shape[1] == 1, 'Number of dimensions must be 1' assert X_obs.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X,phi,np.zeros(X.shape[0])) phi_var = self.predictive_var(mu,var)
gpplot(X,phi,phi_var)
if samples:
phi_samples = np.vstack([np.random.binomial(1,phi.flatten()) for s in range(samples)])
pb.plot(X,phi_samples.T,'x', alpha = 0.4, c='#3465a4' )
pb.plot(X_obs,(self.Y+1)/2,'kx',mew=1.5) pb.plot(X_obs,(self.Y+1)/2,'kx',mew=1.5)
if Z is not None: if Z is not None:
pb.plot(Z,Z*0+.5,'r|',mew=1.5,markersize=12) pb.plot(Z,Z*0+.5,'r|',mew=1.5,markersize=12)
@ -164,16 +172,22 @@ class poisson(likelihood):
sigma2_hat = m2 - mu_hat**2 # Second central moment sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat) return float(Z_hat), float(mu_hat), float(sigma2_hat)
def predictive_mean(self,mu,variance): def predictive_mean(self,mu,var):
return np.exp(mu*self.scale + self.location) return np.exp(mu*self.scale + self.location)
def predictive_var(self,mu,var):
return predictive_mean(mu,var)
def _log_likelihood_gradients(): def _log_likelihood_gradients():
raise NotImplementedError raise NotImplementedError
def plot(self,X,phi,X_obs,Z=None): def plot(self,X,mu,var,phi,X_obs,Z=None,samples=0):
assert X_obs.shape[1] == 1, 'Number of dimensions must be 1' assert X_obs.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X,phi,np.zeros(X.shape[0])) gpplot(X,phi,phi.flatten())
pb.plot(X_obs,self.Y,'kx',mew=1.5) pb.plot(X_obs,self.Y,'kx',mew=1.5)
if samples:
phi_samples = np.vstack([np.random.poisson(phi.flatten(),phi.size) for s in range(samples)])
pb.plot(X,phi_samples.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
if Z is not None: if Z is not None:
pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12) pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)

View file

@ -73,7 +73,6 @@ class GP(model):
self.EP = False self.EP = False
self.Y = Y self.Y = Y
self.beta = 100.#FIXME beta should be an explicit parameter for this model self.beta = 100.#FIXME beta should be an explicit parameter for this model
# Here's some simple normalisation # Here's some simple normalisation
if normalize_Y: if normalize_Y:
self._Ymean = Y.mean(0)[None,:] self._Ymean = Y.mean(0)[None,:]
@ -88,8 +87,9 @@ class GP(model):
self.YYT = np.dot(self.Y, self.Y.T) self.YYT = np.dot(self.Y, self.Y.T)
else: else:
self.YYT = None self.YYT = None
else: else:
if self.D > 1:
raise NotImplementedError, "EP is not implemented for D > 1"
# Y is defined after approximating the likelihood # Y is defined after approximating the likelihood
self.EP = True self.EP = True
self.eta,self.delta = power_ep self.eta,self.delta = power_ep
@ -196,7 +196,6 @@ class GP(model):
This is to allow for different normalisations of the output dimensions. This is to allow for different normalisations of the output dimensions.
""" """
#normalise X values #normalise X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var, phi = self._raw_predict(Xnew, slices, full_cov) mu, var, phi = self._raw_predict(Xnew, slices, full_cov)
@ -224,13 +223,18 @@ class GP(model):
if full_cov: if full_cov:
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices) Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
var = Kxx - np.dot(KiKx.T,Kx) var = Kxx - np.dot(KiKx.T,Kx)
if self.EP:
raise NotImplementedError, "full_cov = True not implemented for EP"
#var = np.diag(var)[:,None]
#phi = self.likelihood.predictive_mean(mu,var)
else: else:
Kxx = self.kern.Kdiag(_Xnew, slices=slices) Kxx = self.kern.Kdiag(_Xnew, slices=slices)
var = Kxx - np.sum(np.multiply(KiKx,Kx),0) var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
phi = None if not self.EP else self.likelihood.predictive_mean(mu,var) if self.EP:
phi = self.likelihood.predictive_mean(mu,var)
return mu, var, phi return mu, var, phi
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None): def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
""" """
:param samples: the number of a posteriori samples to plot :param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all) :param which_data: which if the training data to plot (default all)
@ -268,27 +272,27 @@ class GP(model):
if self.X.shape[1]==1: if self.X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None] Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
m,v,phi = self.predict(Xnew,slices=which_functions) m,v,phi = self.predict(Xnew,slices=which_functions,full_cov=full_cov)
if self.EP: if self.EP:
pb.subplot(211) pb.subplot(211)
gpplot(Xnew,m,v) gpplot(Xnew,m,v)
if samples: #NOTE why don't we put samples as a parameter of gpplot if samples: #NOTE why don't we put samples as a parameter of gpplot
s = np.random.multivariate_normal(m.flatten(),np.diag(v),samples) s = np.random.multivariate_normal(m.flatten(),np.diag(v.flatten()),samples)
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8) pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
pb.plot(Xorig,Yorig,'kx',mew=1.5) pb.plot(Xorig,Yorig,'kx',mew=1.5)
pb.xlim(xmin,xmax) pb.xlim(xmin,xmax)
if self.EP: if self.EP:
pb.subplot(212) pb.subplot(212)
self.likelihood.plot(Xnew,phi,self.X) self.likelihood.plot(Xnew,m,v,phi,self.X,samples=samples)
pb.xlim(xmin,xmax) pb.xlim(xmin,xmax)
elif self.X.shape[1]==2: elif self.X.shape[1]==2:
resolution = 50 or resolution resolution = 50 or resolution
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution] xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xtest = np.vstack((xx.flatten(),yy.flatten())).T Xtest = np.vstack((xx.flatten(),yy.flatten())).T
zz,vv,phi = self.predict(Xtest,slices=which_functions) zz,vv,phi = self.predict(Xtest,slices=which_functions,full_cov=full_cov)
zz = zz.reshape(resolution,resolution) zz = zz.reshape(resolution,resolution)
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet) pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max()) pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())

View file

@ -7,9 +7,10 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from GP import GP from GP import GP
from ..inference.EP import Full from ..inference.EP import Full,DTC,FITC
from ..inference.likelihoods import likelihood,probit,poisson,gaussian from ..inference.likelihoods import likelihood,probit,poisson,gaussian
#Still TODO: #Still TODO:
# make use of slices properly (kernel can now do this) # make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array) # enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
@ -35,10 +36,6 @@ class sparse_GP(GP):
:type beta: float :type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
:parm likelihood: a GPy likelihood, defaults to gaussian
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
:type powerep: list
""" """
def __init__(self,X,Y=None,kernel=None,X_uncertainty=None,beta=100.,Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False,likelihood=None,method_ep='DTC',epsilon_ep=1e-3,power_ep=[1.,1.]): def __init__(self,X,Y=None,kernel=None,X_uncertainty=None,beta=100.,Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False,likelihood=None,method_ep='DTC',epsilon_ep=1e-3,power_ep=[1.,1.]):
@ -58,139 +55,31 @@ class sparse_GP(GP):
self.X_uncertainty = X_uncertainty self.X_uncertainty = X_uncertainty
GP.__init__(self, X=X, Y=Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y,likelihood=likelihood,epsilon_ep=epsilon_ep,power_ep=power_ep) GP.__init__(self, X=X, Y=Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y,likelihood=likelihood,epsilon_ep=epsilon_ep,power_ep=power_ep)
self.trYYT = np.sum(np.square(self.Y)) if not self.EP else None
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
if not self.EP:
self.trYYT = np.sum(np.square(self.Y))
else:
self.method_ep = method_ep
#normalise X uncertainty also #normalise X uncertainty also
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd) self.X_uncertainty /= np.square(self._Xstd)
def _set_params(self, p): def _set_params(self, p):
if not self.EP:
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
if not self.EP:
self.beta = p[self.M*self.Q] self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:]) self.kern._set_params(p[self.Z.size + 1:])
self.beta2 = self.beta**2 else:
self.kern._set_params(p[self.Z.size:])
if self.Y is None:
self.Y = np.ones([self.N,1])
self._compute_kernel_matrices() self._compute_kernel_matrices()
self._computations() self._computations()
else:
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:])
#self._compute_kernel_matrices() this is replaced by _ep_kernel_matrices
self._ep_kernel_matrices()
self._ep_computations()
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
#TODO: slices for psi statistics (easy enough)
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
if self.hetero_noise:
raise NotImplementedError, "uncertain ips and het noise not yet supported"
else:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
else:
if self.hetero_noise:
print "rick's stuff here"
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T)
def _computations(self):
# TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.trace_K = self.psi0 - np.trace(self.A)/self.beta
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T)
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
def approximate_likelihood(self):
assert not isinstance(self.likelihood, gaussian), "EP is only available for non-gaussian likelihoods"
if self.ep_proxy == 'DTC':
self.ep_approx = DTC(self.Kmm,self.likelihood,self.psi1,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
elif self.ep_proxy == 'FITC':
self.Knn_diag = self.kern.psi0(self.Z,self.X, self.X_uncertainty) #TODO psi0 already calculates this
self.ep_approx = FITC(self.Kmm,self.likelihood,self.psi1,self.Knn_diag,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
else:
self.ep_approx = Full(self.X,self.likelihood,self.kernel,inducing=None,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
self.beta, self.v_tilde, self.Z_hat, self.tau_, self.v_=self.ep_approx.fit_EP()
self._ep_kernel_matrices()
self._computations()
def _ep_kernel_matrices(self):
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) #FIXME include beta
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T)
self.psi2_beta_scaled = np.dot(self.psi1,self.beta*self.psi1.T)
def _ep_computations(self):
# Y: EP likelihood is defined as a regression model for mu_tilde
self.Y = self.v_tilde/self.beta
self._Ymean = np.zeros((1,self.Y.shape[1]))
self._Ystd = np.ones((1,self.Y.shape[1]))
self.trbetaYYT = np.sum(self.beta*np.square(self.Y))
if self.D > self.N:
# then it's more efficient to store YYT
self.YYT = np.dot(self.Y, self.Y.T)
else:
self.YYT = None
self.mu_ = self.v_/self.tau_
# TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
#self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.trace_K = self.psi0.sum() - np.trace(self.A)
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
# Compute dL_dpsi
#self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi0 = - 0.5 * self.D * self.beta.flatten() * np.ones(self.N) #TODO check
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T)
#self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
def _get_params(self): def _get_params(self):
if not self.EP: if not self.EP:
@ -204,19 +93,84 @@ class sparse_GP(GP):
else: else:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + self.kern._get_param_names_transformed() return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + self.kern._get_param_names_transformed()
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
#TODO: slices for psi statistics (easy enough)
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
if not self.EP:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)#.sum() NOTE psi0 is now a vector
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
#self.psi2_beta_scaled = ?
else:
raise NotImplementedError, "uncertain_inputs not yet supported for EP"
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)#.sum()
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T)
self.psi2_beta_scaled = np.dot(self.psi1,self.beta*self.psi1.T)
def _computations(self):
# TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
self.trace_K_beta_scaled = (self.psi0*self.beta).sum() - np.trace(self.A)
if not self.EP:
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.beta
# Compute dL_dpsi
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T)
if not self.EP:
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
if self.has_uncertain_inputs:
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi0 = - 0.5 * self.D * self.beta.flatten()
if not self.has_uncertain_inputs:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*mdot(self.LBL_inv, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.psi2_beta_scaled) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
def approximate_likelihood(self):
assert not isinstance(self.likelihood, gaussian), "EP is only available for non-gaussian likelihoods"
if self.method_ep == 'DTC':
self.ep_approx = DTC(self.Kmm,self.likelihood,self.psi1,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
elif self.method_ep == 'FITC':
self.ep_approx = FITC(self.Kmm,self.likelihood,self.psi1,self.psi0,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
else:
self.ep_approx = Full(self.X,self.likelihood,self.kernel,inducing=None,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
self.beta, self.Y, self.Z_ep = self.ep_approx.fit_EP()
self.trbetaYYT = np.sum(np.square(self.Y)*self.beta)
self._computations()
def log_likelihood(self): def log_likelihood(self):
""" """
Compute the (lower bound on the) log marginal likelihood Compute the (lower bound on the) log marginal likelihood
""" """
beta_logdet = self.N*self.D*np.log(self.beta) if not self.EP else self.D*np.sum(np.log(self.beta)) if not self.EP:
if self.hetero_noise: A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
A = foo
B = bar
D = -0.5*self.trbetaYYT
else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi)) - 0.5*beta_logdet
B = -0.5*self.beta*self.D*self.trace_K if not self.EP else -0.5*self.D*self.trace_K
D = -0.5*self.beta*self.trYYT D = -0.5*self.beta*self.trYYT
else:
A = -0.5*self.D*(self.N*np.log(2.*np.pi) - np.sum(np.log(self.beta)))
D = -0.5*self.trbetaYYT
B = -0.5*self.D*self.trace_K_beta_scaled
C = -0.5*self.D * self.B_logdet C = -0.5*self.D * self.B_logdet
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv) E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return A+B+C+D+E return A+B+C+D+E
@ -246,7 +200,7 @@ class sparse_GP(GP):
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1) #dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
@ -262,32 +216,41 @@ class sparse_GP(GP):
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty)
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1)#dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ return dL_dZ
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
if not self.EP:
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()]) return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
else:
return np.hstack([self.dL_dZ().flatten(), self.dL_dtheta()])
def _raw_predict(self, Xnew, slices, full_cov=False): def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation""" """Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew) Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.LBL_inv, self.psi1V) mu = mdot(Kx.T, self.LBL_inv, self.psi1V)
phi = None
if full_cov: if full_cov:
noise_term = np.eye(Xnew.shape[0])/self.beta if not self.EP else 0
Kxx = self.kern.K(Xnew) Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx) + noise_term var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx)
if not self.EP:
var += np.eye(Xnew.shape[0])/self.beta
else:
raise NotImplementedError, "full_cov = True not implemented for EP"
else: else:
noise_term = 1./self.beta if not self.EP else 0
Kxx = self.kern.Kdiag(Xnew) Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0) + noise_term var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0)
return mu,var,None#TODO add phi for EP if not self.EP:
var += 1./self.beta
else:
phi = self.likelihood.predictive_mean(mu,var)
return mu,var,phi
def plot(self, *args, **kwargs): def plot(self, *args, **kwargs):
""" """
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
""" """
#GP_regression.plot(self,*args,**kwargs)
GP.plot(self,*args,**kwargs) GP.plot(self,*args,**kwargs)
if self.Q==1: if self.Q==1:
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12) pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)