Merge branch 'newGP' of github.com:SheffieldML/GPy into newGP

Conflicts:
	GPy/likelihoods/EP.py
	GPy/likelihoods/likelihood_functions.py
This commit is contained in:
Ricardo Andrade 2013-02-01 13:19:59 +00:00
commit 879fa138e1
7 changed files with 464 additions and 369 deletions

View file

@ -1,11 +1,9 @@
import numpy as np
import random
from scipy import stats, linalg
#from ..core import model
from ..util.linalg import pdinv,mdot,jitchol
from ..util.plot import gpplot
from likelihood import likelihood
class EP:
class EP(likelihood):
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
"""
Expectation Propagation
@ -20,11 +18,10 @@ class EP:
self.eta, self.delta = power_ep
self.data = data
self.N = self.data.size
self.is_heteroscedastic = True
"""
Initial values - Likelihood approximation parameters:
p(y|f) = t(f|tau_tilde,v_tilde)
"""
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
@ -51,9 +48,11 @@ class EP:
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
Z_ep = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant
self.Y, self.beta, self.Z = mu_tilde[:,None],self.tau_tilde[:,None], Z_ep
self.variance = np.diag(1./self.beta.flatten())
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Y = mu_tilde[:,None]
self.precsion = self.tau_tilde[:,None]
self.covariance_matrix = np.diag(1./self.precision)
def fit_full(self,K):
"""

View file

@ -1,7 +1,9 @@
import numpy as np
from likelihood import likelihood
class Gaussian:
class Gaussian(likelihood):
def __init__(self,data,variance=1.,normalize=False):
self.is_heteroscedastic = False
self.data = data
self.N,D = data.shape
self.Z = 0. # a correction factor which accounts for the approximation made
@ -19,6 +21,7 @@ class Gaussian:
self.YYT = np.dot(self.Y,self.Y.T)
self._set_params(np.asarray(variance))
def _get_params(self):
return np.asarray(self._variance)
@ -27,7 +30,8 @@ class Gaussian:
def _set_params(self,x):
self._variance = x
self.variance = np.eye(self.N)*self._variance
self.covariance_matrix = np.eye(self.N)*self._variance
self.precision = 1./self._variance
def predictive_values(self,mu,var):
"""

View file

@ -0,0 +1,35 @@
import numpy as np
class likelihood:
"""
The atom for a likelihood class
This object interfaces the GP and the data. The most basic likelihood
(Gaussian) inherits directly from this, as does the EP algorithm
Some things must be defined for this to work properly:
self.Y : the effective Gaussian target of the GP
self.N, self.D : Y.shape
self.covariance_matrix : the effective (noise) covariance of the GP targets
self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
self.is_heteroscedastic : enables significant computational savings in GP
self.precision : a scalar or vector representation of the effective target precision
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
"""
def __init__(self,data):
raise ValueError, "this class is not to be instantiated"
def _get_params(self):
raise NotImplementedError
def _get_param_names(self):
raise NotImplementedError
def _set_params(self,x):
raise NotImplementedError
def fit(self):
raise NotImplementedError
def _gradients(self,partial):
raise NotImplementedError

View file

@ -9,18 +9,22 @@ import pylab as pb
from ..util.plot import gpplot
#from . import EP
class likelihood:
class likelihood_function:
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood used
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self,location=0,scale=1):
self.location = location
self.scale = scale
<<<<<<< HEAD
class Probit(likelihood):
=======
class probit(likelihood_function):
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
"""
Probit likelihood
Y is expected to take values in {-1,1}
@ -29,6 +33,11 @@ class Probit(likelihood):
L(x) = \\Phi (Y_i*f_i)
$$
"""
<<<<<<< HEAD
=======
def __init__(self,location=0,scale=1):
likelihood_function.__init__(self,Y,location,scale)
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
def moments_match(self,data_i,tau_i,v_i):
"""
@ -57,7 +66,11 @@ class Probit(likelihood):
p_95 = np.ones([mu.size])
return mean, p_05, p_95
<<<<<<< HEAD
class Poisson(likelihood):
=======
class poisson(likelihood_function):
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
"""
Poisson likelihood
Y is expected to take values in {0,1,2,...}
@ -66,6 +79,12 @@ class Poisson(likelihood):
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
<<<<<<< HEAD
=======
def __init__(self,Y,location=0,scale=1):
assert len(Y[Y<0]) == 0, "Output cannot have negative values"
likelihood_function.__init__(self,Y,location,scale)
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
def moments_match(self,i,tau_i,v_i):
"""
@ -129,7 +148,54 @@ class Poisson(likelihood):
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
"""
mean = np.exp(mu*self.scale + self.location)
<<<<<<< HEAD
tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
p_05 = tmp[:,0]
p_95 = tmp[:,1]
return mean,p_05,p_95
=======
if all:
tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
p_05 = tmp[:,0]
p_95 = tmp[:,1]
return mean,mean,p_05,p_95
else:
return mean
def _log_likelihood_gradients():
raise NotImplementedError
def plot(self,X,mu,var,phi,X_obs,Z=None,samples=0):
assert X_obs.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X,phi,phi.flatten())
pb.plot(X_obs,self.Y,'kx',mew=1.5)
if samples:
phi_samples = np.vstack([np.random.poisson(phi.flatten(),phi.size) for s in range(samples)])
pb.plot(X,phi_samples.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
if Z is not None:
pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
class gaussian(likelihood_function):
"""
Gaussian likelihood
Y is expected to take values in (-inf,inf)
"""
def moments_match(self,i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
s = 1. if self.Y[i] == 0 else 1./self.Y[i]
sigma2_hat = 1./(1./sigma**2 + 1./s**2)
mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
return Z_hat, mu_hat, sigma2_hat
def _log_likelihood_gradients():
raise NotImplementedError
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3

View file

@ -6,37 +6,36 @@ import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot
from .. import kern
from ..inference.likelihoods import likelihood
from GP import GP
#Still TODO:
# make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP(GP):
"""
Variational sparse GP model (Regression)
Variational sparse GP model
:param X: inputs
:type X: np.ndarray (N x Q)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self,X,Y=None,kernel=None,X_uncertainty=None,beta=100.,Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False,likelihood=None,method_ep='DTC',epsilon_ep=1e-3,power_ep=[1.,1.]):
def __init__(self,X,likelihood,kernel, X_uncertainty=None, Z=None,Zslices=None,M=10,normalize_X=False):
self.scale_factor = 1000.0# a scaling factor to help keep the algorithm stable
if Z is None:
self.Z = np.random.permutation(X.copy())[:M]
@ -52,140 +51,91 @@ class sparse_GP(GP):
self.has_uncertain_inputs=True
self.X_uncertainty = X_uncertainty
GP.__init__(self, X=X, Y=Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y,likelihood=likelihood,epsilon_ep=epsilon_ep,power_ep=power_ep)
GP.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
if not self.EP:
self.trYYT = np.sum(np.square(self.Y))
else:
self.method_ep = method_ep
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
if not self.EP:
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
else:
self.kern._set_params(p[self.Z.size:])
if self.Y is None:
self.Y = np.ones([self.N,1])
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
if not self.EP:
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
else:
return np.hstack([self.Z.flatten(),self.kern._get_params_transformed()])
def _get_param_names(self):
if not self.EP:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
else:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + self.kern._get_param_names_transformed()
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
#TODO: slices for psi statistics (easy enough)
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
if not self.EP:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)#.sum() NOTE psi0 is now a vector
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
#self.psi2_beta_scaled = ?
else:
raise NotImplementedError, "uncertain_inputs not yet supported for EP"
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)#.sum()
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T)
self.psi2_beta_scaled = np.dot(self.psi1,self.beta*self.psi1.T)
def _computations(self):
# TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y
#TODO: slices for psi statistics (easy enough)
sf = self.scale_factor
sf2 = sf**2
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
self.psi2_beta_scaled = (self.psi2*(self.beta/sf2)).sum(0)
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X)
tmp = self.psi1*(np.sqrt(self.likelihood.beta)/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
self.V = (self.likelihood.beta/self.scale_factor)*self.Y
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
self.trace_K_beta_scaled = (self.psi0*self.beta).sum() - np.trace(self.A)
if not self.EP:
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.beta
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
# Compute dL_dpsi
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T)
if not self.EP:
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
if self.has_uncertain_inputs:
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi0 = - 0.5 * self.D * self.beta.flatten()
if not self.has_uncertain_inputs:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
# Compute dL_dpsi # FIXME
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*mdot(self.LBL_inv, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.psi2_beta_scaled) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
def approximate_likelihood(self):
assert not isinstance(self.likelihood, gaussian), "EP is only available for non-gaussian likelihoods"
if self.method_ep == 'DTC':
self.ep_approx = DTC(self.Kmm,self.likelihood,self.psi1,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
elif self.method_ep == 'FITC':
self.ep_approx = FITC(self.Kmm,self.likelihood,self.psi1,self.psi0,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
else:
self.ep_approx = Full(self.X,self.likelihood,self.kernel,inducing=None,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
self.beta, self.Y, self.Z_ep = self.ep_approx.fit_EP()
self.trbetaYYT = np.sum(np.square(self.Y)*self.beta)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.beta = p[self.M*self.Q] # FIXME
self.kern._set_params(p[self.Z.size + 1:])
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(),GP._get_params(self))
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + GP._get_param_names(self)
def log_likelihood(self):
"""
Compute the (lower bound on the) log marginal likelihood
"""
if not self.EP:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
D = -0.5*self.beta*self.trYYT
else:
A = -0.5*self.D*(self.N*np.log(2.*np.pi) - np.sum(np.log(self.beta)))
D = -0.5*self.trbetaYYT
B = -0.5*self.D*self.trace_K_beta_scaled
C = -0.5*self.D * self.B_logdet
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return A+B+C+D+E
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT # FIXME
B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)# FIXME
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D
def _log_likelihood_gradients(self):
return np.hstack([self.dL_dZ().flatten(), GP._log_likelihood_gradients(self)])
# FIXME: move this into the lieklihood class
def dL_dbeta(self):
"""
Compute the gradient of the log likelihood wrt beta.
"""
#TODO: suport heteroscedatic noise
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
sf2 = self.scale_factor**2
dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta)
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
def dL_dtheta(self):
"""
@ -195,10 +145,10 @@ class sparse_GP(GP):
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1) #dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
@ -208,48 +158,36 @@ class sparse_GP(GP):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty)
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1)#dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ
def _log_likelihood_gradients(self):
if not self.EP:
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
else:
return np.hstack([self.dL_dZ().flatten(), self.dL_dtheta()])
def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.LBL_inv, self.psi1V)
phi = None
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx)
if not self.EP:
var += np.eye(Xnew.shape[0])/self.beta
else:
raise NotImplementedError, "full_cov = True not implemented for EP"
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx)
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0)
if not self.EP:
var += 1./self.beta
else:
phi = self.likelihood.predictive_mean(mu,var)
return mu,var,phi
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
"""
GP.plot(self,*args,**kwargs)
GP_regression.plot(self,*args,**kwargs)
if self.Q==1:
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:

258
GPy/models/sparse_GP_old.py Normal file
View file

@ -0,0 +1,258 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot
from .. import kern
from GP import GP
#Still TODO:
# make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP(GP):
"""
Variational sparse GP model (Regression)
:param X: inputs
:type X: np.ndarray (N x Q)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self,X,Y=None,kernel=None,X_uncertainty=None,beta=100.,Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False,likelihood=None,method_ep='DTC',epsilon_ep=1e-3,power_ep=[1.,1.]):
if Z is None:
self.Z = np.random.permutation(X.copy())[:M]
self.M = M
else:
assert Z.shape[1]==X.shape[1]
self.Z = Z
self.M = Z.shape[0]
if X_uncertainty is None:
self.has_uncertain_inputs=False
else:
assert X_uncertainty.shape==X.shape
self.has_uncertain_inputs=True
self.X_uncertainty = X_uncertainty
GP.__init__(self, X=X, Y=Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y,likelihood=likelihood,epsilon_ep=epsilon_ep,power_ep=power_ep)
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
if not self.EP:
self.trYYT = np.sum(np.square(self.Y))
else:
self.method_ep = method_ep
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
if not self.EP:
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
else:
self.kern._set_params(p[self.Z.size:])
if self.Y is None:
self.Y = np.ones([self.N,1])
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
if not self.EP:
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
else:
return np.hstack([self.Z.flatten(),self.kern._get_params_transformed()])
def _get_param_names(self):
if not self.EP:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
else:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + self.kern._get_param_names_transformed()
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
#TODO: slices for psi statistics (easy enough)
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
if not self.EP:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)#.sum() NOTE psi0 is now a vector
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
#self.psi2_beta_scaled = ?
else:
raise NotImplementedError, "uncertain_inputs not yet supported for EP"
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)#.sum()
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T)
self.psi2_beta_scaled = np.dot(self.psi1,self.beta*self.psi1.T)
def _computations(self):
# TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
self.trace_K_beta_scaled = (self.psi0*self.beta).sum() - np.trace(self.A)
if not self.EP:
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.beta
# Compute dL_dpsi
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T)
if not self.EP:
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
if self.has_uncertain_inputs:
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi0 = - 0.5 * self.D * self.beta.flatten()
if not self.has_uncertain_inputs:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*mdot(self.LBL_inv, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.psi2_beta_scaled) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
def approximate_likelihood(self):
assert not isinstance(self.likelihood, gaussian), "EP is only available for non-gaussian likelihoods"
if self.method_ep == 'DTC':
self.ep_approx = DTC(self.Kmm,self.likelihood,self.psi1,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
elif self.method_ep == 'FITC':
self.ep_approx = FITC(self.Kmm,self.likelihood,self.psi1,self.psi0,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
else:
self.ep_approx = Full(self.X,self.likelihood,self.kernel,inducing=None,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
self.beta, self.Y, self.Z_ep = self.ep_approx.fit_EP()
self.trbetaYYT = np.sum(np.square(self.Y)*self.beta)
self._computations()
def log_likelihood(self):
"""
Compute the (lower bound on the) log marginal likelihood
"""
if not self.EP:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
D = -0.5*self.beta*self.trYYT
else:
A = -0.5*self.D*(self.N*np.log(2.*np.pi) - np.sum(np.log(self.beta)))
D = -0.5*self.trbetaYYT
B = -0.5*self.D*self.trace_K_beta_scaled
C = -0.5*self.D * self.B_logdet
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return A+B+C+D+E
def dL_dbeta(self):
"""
Compute the gradient of the log likelihood wrt beta.
"""
#TODO: suport heteroscedatic noise
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta)
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1) #dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty)
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1)#dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ
def _log_likelihood_gradients(self):
if not self.EP:
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
else:
return np.hstack([self.dL_dZ().flatten(), self.dL_dtheta()])
def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.LBL_inv, self.psi1V)
phi = None
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx)
if not self.EP:
var += np.eye(Xnew.shape[0])/self.beta
else:
raise NotImplementedError, "full_cov = True not implemented for EP"
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0)
if not self.EP:
var += 1./self.beta
else:
phi = self.likelihood.predictive_mean(mu,var)
return mu,var,phi
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
"""
GP.plot(self,*args,**kwargs)
if self.Q==1:
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')

View file

@ -1,205 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot
from .. import kern
from ..inference.likelihoods import likelihood
from GP_regression import GP_regression
#Still TODO:
# make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP_regression(GP_regression):
"""
Variational sparse GP model (Regression)
:param X: inputs
:type X: np.ndarray (N x Q)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
self.scale_factor = 1000.0
self.beta = beta
if Z is None:
self.Z = np.random.permutation(X.copy())[:M]
self.M = M
else:
assert Z.shape[1]==X.shape[1]
self.Z = Z
self.M = Z.shape[0]
if X_uncertainty is None:
self.has_uncertain_inputs=False
else:
assert X_uncertainty.shape==X.shape
self.has_uncertain_inputs=True
self.X_uncertainty = X_uncertainty
GP_regression.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y)
self.trYYT = np.sum(np.square(self.Y))
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _computations(self):
# TODO find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough)
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0)
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X)
#self.psi2 = np.dot(self.psi1,self.psi1.T)
#self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta))
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
sf = self.scale_factor
sf2 = sf**2
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
self.V = (self.beta/self.scale_factor)*self.Y
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT
B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D
def _log_likelihood_gradients(self):
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
def dL_dbeta(self):
"""
Compute the gradient of the log likelihood wrt beta.
"""
#TODO: suport heteroscedatic noise
sf2 = self.scale_factor**2
dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ
def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
return mu,var
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
"""
GP_regression.plot(self,*args,**kwargs)
if self.Q==1:
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')