Merge branch 'newGP' of github.com:SheffieldML/GPy into newGP

Conflicts:
	GPy/likelihoods/EP.py
	GPy/likelihoods/likelihood_functions.py
This commit is contained in:
Ricardo Andrade 2013-02-01 13:19:59 +00:00
commit 879fa138e1
7 changed files with 464 additions and 369 deletions

View file

@ -1,11 +1,9 @@
import numpy as np import numpy as np
import random
from scipy import stats, linalg from scipy import stats, linalg
#from ..core import model
from ..util.linalg import pdinv,mdot,jitchol from ..util.linalg import pdinv,mdot,jitchol
from ..util.plot import gpplot from likelihood import likelihood
class EP: class EP(likelihood):
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]): def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
""" """
Expectation Propagation Expectation Propagation
@ -20,11 +18,10 @@ class EP:
self.eta, self.delta = power_ep self.eta, self.delta = power_ep
self.data = data self.data = data
self.N = self.data.size self.N = self.data.size
self.is_heteroscedastic = True
""" #Initial values - Likelihood approximation parameters:
Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde)
p(y|f) = t(f|tau_tilde,v_tilde)
"""
self.tau_tilde = np.zeros(self.N) self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.N)
@ -51,9 +48,11 @@ class EP:
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
Z_ep = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Y, self.beta, self.Z = mu_tilde[:,None],self.tau_tilde[:,None], Z_ep
self.variance = np.diag(1./self.beta.flatten()) self.Y = mu_tilde[:,None]
self.precsion = self.tau_tilde[:,None]
self.covariance_matrix = np.diag(1./self.precision)
def fit_full(self,K): def fit_full(self,K):
""" """

View file

@ -1,7 +1,9 @@
import numpy as np import numpy as np
from likelihood import likelihood
class Gaussian: class Gaussian(likelihood):
def __init__(self,data,variance=1.,normalize=False): def __init__(self,data,variance=1.,normalize=False):
self.is_heteroscedastic = False
self.data = data self.data = data
self.N,D = data.shape self.N,D = data.shape
self.Z = 0. # a correction factor which accounts for the approximation made self.Z = 0. # a correction factor which accounts for the approximation made
@ -19,6 +21,7 @@ class Gaussian:
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y,self.Y.T)
self._set_params(np.asarray(variance)) self._set_params(np.asarray(variance))
def _get_params(self): def _get_params(self):
return np.asarray(self._variance) return np.asarray(self._variance)
@ -27,7 +30,8 @@ class Gaussian:
def _set_params(self,x): def _set_params(self,x):
self._variance = x self._variance = x
self.variance = np.eye(self.N)*self._variance self.covariance_matrix = np.eye(self.N)*self._variance
self.precision = 1./self._variance
def predictive_values(self,mu,var): def predictive_values(self,mu,var):
""" """

View file

@ -0,0 +1,35 @@
import numpy as np
class likelihood:
"""
The atom for a likelihood class
This object interfaces the GP and the data. The most basic likelihood
(Gaussian) inherits directly from this, as does the EP algorithm
Some things must be defined for this to work properly:
self.Y : the effective Gaussian target of the GP
self.N, self.D : Y.shape
self.covariance_matrix : the effective (noise) covariance of the GP targets
self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
self.is_heteroscedastic : enables significant computational savings in GP
self.precision : a scalar or vector representation of the effective target precision
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
"""
def __init__(self,data):
raise ValueError, "this class is not to be instantiated"
def _get_params(self):
raise NotImplementedError
def _get_param_names(self):
raise NotImplementedError
def _set_params(self,x):
raise NotImplementedError
def fit(self):
raise NotImplementedError
def _gradients(self,partial):
raise NotImplementedError

View file

@ -9,18 +9,22 @@ import pylab as pb
from ..util.plot import gpplot from ..util.plot import gpplot
#from . import EP #from . import EP
class likelihood: class likelihood_function:
""" """
Likelihood class for doing Expectation propagation Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray) :param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood used ..Note:: Y values allowed depend on the likelihood_function used
""" """
def __init__(self,location=0,scale=1): def __init__(self,location=0,scale=1):
self.location = location self.location = location
self.scale = scale self.scale = scale
<<<<<<< HEAD
class Probit(likelihood): class Probit(likelihood):
=======
class probit(likelihood_function):
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
""" """
Probit likelihood Probit likelihood
Y is expected to take values in {-1,1} Y is expected to take values in {-1,1}
@ -29,6 +33,11 @@ class Probit(likelihood):
L(x) = \\Phi (Y_i*f_i) L(x) = \\Phi (Y_i*f_i)
$$ $$
""" """
<<<<<<< HEAD
=======
def __init__(self,location=0,scale=1):
likelihood_function.__init__(self,Y,location,scale)
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
def moments_match(self,data_i,tau_i,v_i): def moments_match(self,data_i,tau_i,v_i):
""" """
@ -57,7 +66,11 @@ class Probit(likelihood):
p_95 = np.ones([mu.size]) p_95 = np.ones([mu.size])
return mean, p_05, p_95 return mean, p_05, p_95
<<<<<<< HEAD
class Poisson(likelihood): class Poisson(likelihood):
=======
class poisson(likelihood_function):
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
""" """
Poisson likelihood Poisson likelihood
Y is expected to take values in {0,1,2,...} Y is expected to take values in {0,1,2,...}
@ -66,6 +79,12 @@ class Poisson(likelihood):
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$ $$
""" """
<<<<<<< HEAD
=======
def __init__(self,Y,location=0,scale=1):
assert len(Y[Y<0]) == 0, "Output cannot have negative values"
likelihood_function.__init__(self,Y,location,scale)
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3
def moments_match(self,i,tau_i,v_i): def moments_match(self,i,tau_i,v_i):
""" """
@ -129,7 +148,54 @@ class Poisson(likelihood):
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
""" """
mean = np.exp(mu*self.scale + self.location) mean = np.exp(mu*self.scale + self.location)
<<<<<<< HEAD
tmp = stats.poisson.ppf(np.array([.05,.95]),mu) tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
p_05 = tmp[:,0] p_05 = tmp[:,0]
p_95 = tmp[:,1] p_95 = tmp[:,1]
return mean,p_05,p_95 return mean,p_05,p_95
=======
if all:
tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
p_05 = tmp[:,0]
p_95 = tmp[:,1]
return mean,mean,p_05,p_95
else:
return mean
def _log_likelihood_gradients():
raise NotImplementedError
def plot(self,X,mu,var,phi,X_obs,Z=None,samples=0):
assert X_obs.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X,phi,phi.flatten())
pb.plot(X_obs,self.Y,'kx',mew=1.5)
if samples:
phi_samples = np.vstack([np.random.poisson(phi.flatten(),phi.size) for s in range(samples)])
pb.plot(X,phi_samples.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
if Z is not None:
pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
class gaussian(likelihood_function):
"""
Gaussian likelihood
Y is expected to take values in (-inf,inf)
"""
def moments_match(self,i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
s = 1. if self.Y[i] == 0 else 1./self.Y[i]
sigma2_hat = 1./(1./sigma**2 + 1./s**2)
mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
return Z_hat, mu_hat, sigma2_hat
def _log_likelihood_gradients():
raise NotImplementedError
>>>>>>> 346f9dd8bd3207959b87ded258e55aeb094f1ea3

View file

@ -6,37 +6,36 @@ import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from ..inference.likelihoods import likelihood
from GP import GP from GP import GP
#Still TODO: #Still TODO:
# make use of slices properly (kernel can now do this) # make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array) # enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP(GP): class sparse_GP(GP):
""" """
Variational sparse GP model (Regression) Variational sparse GP model
:param X: inputs :param X: inputs
:type X: np.ndarray (N x Q) :type X: np.ndarray (N x Q)
:param Y: observed data :param likelihood: a likelihood instance, containing the observed data
:type Y: np.ndarray of observations (N x D) :type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel/covariance function. See link kernels :param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel :type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None :type X_uncertainty: np.ndarray (N x Q) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link) :param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None) :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int :type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """
def __init__(self,X,Y=None,kernel=None,X_uncertainty=None,beta=100.,Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False,likelihood=None,method_ep='DTC',epsilon_ep=1e-3,power_ep=[1.,1.]): def __init__(self,X,likelihood,kernel, X_uncertainty=None, Z=None,Zslices=None,M=10,normalize_X=False):
self.scale_factor = 1000.0# a scaling factor to help keep the algorithm stable
if Z is None: if Z is None:
self.Z = np.random.permutation(X.copy())[:M] self.Z = np.random.permutation(X.copy())[:M]
@ -52,140 +51,91 @@ class sparse_GP(GP):
self.has_uncertain_inputs=True self.has_uncertain_inputs=True
self.X_uncertainty = X_uncertainty self.X_uncertainty = X_uncertainty
GP.__init__(self, X=X, Y=Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y,likelihood=likelihood,epsilon_ep=epsilon_ep,power_ep=power_ep) GP.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
#normalise X uncertainty also #normalise X uncertainty also
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd) self.X_uncertainty /= np.square(self._Xstd)
if not self.EP:
self.trYYT = np.sum(np.square(self.Y))
else:
self.method_ep = method_ep
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
if not self.EP:
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
else:
self.kern._set_params(p[self.Z.size:])
if self.Y is None:
self.Y = np.ones([self.N,1])
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
if not self.EP:
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
else:
return np.hstack([self.Z.flatten(),self.kern._get_params_transformed()])
def _get_param_names(self):
if not self.EP:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
else:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + self.kern._get_param_names_transformed()
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
#TODO: slices for psi statistics (easy enough)
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
if not self.EP:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)#.sum() NOTE psi0 is now a vector
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
#self.psi2_beta_scaled = ?
else:
raise NotImplementedError, "uncertain_inputs not yet supported for EP"
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)#.sum()
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T)
self.psi2_beta_scaled = np.dot(self.psi1,self.beta*self.psi1.T)
def _computations(self): def _computations(self):
# TODO find routine to multiply triangular matrices # TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y #TODO: slices for psi statistics (easy enough)
sf = self.scale_factor
sf2 = sf**2
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
self.psi2_beta_scaled = (self.psi2*(self.beta/sf2)).sum(0)
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X)
tmp = self.psi1*(np.sqrt(self.likelihood.beta)/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
self.V = (self.likelihood.beta/self.scale_factor)*self.Y
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T) self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
self.trace_K_beta_scaled = (self.psi0*self.beta).sum() - np.trace(self.A)
if not self.EP:
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.beta
# Compute dL_dpsi # Compute dL_dpsi # FIXME
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T) self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
if not self.EP: self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N) self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
if self.has_uncertain_inputs: self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G) self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
else:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi0 = - 0.5 * self.D * self.beta.flatten()
if not self.has_uncertain_inputs:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
# Compute dL_dKmm # Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*mdot(self.LBL_inv, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.psi2_beta_scaled) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
def approximate_likelihood(self):
assert not isinstance(self.likelihood, gaussian), "EP is only available for non-gaussian likelihoods" def _set_params(self, p):
if self.method_ep == 'DTC': self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.ep_approx = DTC(self.Kmm,self.likelihood,self.psi1,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta]) self.beta = p[self.M*self.Q] # FIXME
elif self.method_ep == 'FITC': self.kern._set_params(p[self.Z.size + 1:])
self.ep_approx = FITC(self.Kmm,self.likelihood,self.psi1,self.psi0,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
else:
self.ep_approx = Full(self.X,self.likelihood,self.kernel,inducing=None,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
self.beta, self.Y, self.Z_ep = self.ep_approx.fit_EP()
self.trbetaYYT = np.sum(np.square(self.Y)*self.beta)
self._computations() self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(),GP._get_params(self))
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + GP._get_param_names(self)
def log_likelihood(self): def log_likelihood(self):
""" """ Compute the (lower bound on the) log marginal likelihood """
Compute the (lower bound on the) log marginal likelihood sf2 = self.scale_factor**2
""" A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT # FIXME
if not self.EP: B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)# FIXME
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = -0.5*self.beta*self.trYYT D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
else: return A+B+C+D
A = -0.5*self.D*(self.N*np.log(2.*np.pi) - np.sum(np.log(self.beta)))
D = -0.5*self.trbetaYYT
B = -0.5*self.D*self.trace_K_beta_scaled
C = -0.5*self.D * self.B_logdet
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return A+B+C+D+E
def _log_likelihood_gradients(self):
return np.hstack([self.dL_dZ().flatten(), GP._log_likelihood_gradients(self)])
# FIXME: move this into the lieklihood class
def dL_dbeta(self): def dL_dbeta(self):
""" sf2 = self.scale_factor**2
Compute the gradient of the log likelihood wrt beta. dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
""" dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
#TODO: suport heteroscedatic noise
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta) return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
def dL_dtheta(self): def dL_dtheta(self):
""" """
@ -195,10 +145,10 @@ class sparse_GP(GP):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1) #dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
@ -208,48 +158,36 @@ class sparse_GP(GP):
""" """
The derivative of the bound wrt the inducing inputs Z The derivative of the bound wrt the inducing inputs Z
""" """
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1)#dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ return dL_dZ
def _log_likelihood_gradients(self):
if not self.EP:
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
else:
return np.hstack([self.dL_dZ().flatten(), self.dL_dtheta()])
def _raw_predict(self, Xnew, slices, full_cov=False): def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation""" """Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew) Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.LBL_inv, self.psi1V) mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
phi = None
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew) Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx) var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx)
if not self.EP:
var += np.eye(Xnew.shape[0])/self.beta
else:
raise NotImplementedError, "full_cov = True not implemented for EP"
else: else:
Kxx = self.kern.Kdiag(Xnew) Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0) var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
if not self.EP:
var += 1./self.beta return mu,var
else:
phi = self.likelihood.predictive_mean(mu,var)
return mu,var,phi
def plot(self, *args, **kwargs): def plot(self, *args, **kwargs):
""" """
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
""" """
GP.plot(self,*args,**kwargs) GP_regression.plot(self,*args,**kwargs)
if self.Q==1: if self.Q==1:
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12) pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:

258
GPy/models/sparse_GP_old.py Normal file
View file

@ -0,0 +1,258 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot
from .. import kern
from GP import GP
#Still TODO:
# make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP(GP):
"""
Variational sparse GP model (Regression)
:param X: inputs
:type X: np.ndarray (N x Q)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self,X,Y=None,kernel=None,X_uncertainty=None,beta=100.,Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False,likelihood=None,method_ep='DTC',epsilon_ep=1e-3,power_ep=[1.,1.]):
if Z is None:
self.Z = np.random.permutation(X.copy())[:M]
self.M = M
else:
assert Z.shape[1]==X.shape[1]
self.Z = Z
self.M = Z.shape[0]
if X_uncertainty is None:
self.has_uncertain_inputs=False
else:
assert X_uncertainty.shape==X.shape
self.has_uncertain_inputs=True
self.X_uncertainty = X_uncertainty
GP.__init__(self, X=X, Y=Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y,likelihood=likelihood,epsilon_ep=epsilon_ep,power_ep=power_ep)
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
if not self.EP:
self.trYYT = np.sum(np.square(self.Y))
else:
self.method_ep = method_ep
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
if not self.EP:
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
else:
self.kern._set_params(p[self.Z.size:])
if self.Y is None:
self.Y = np.ones([self.N,1])
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
if not self.EP:
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
else:
return np.hstack([self.Z.flatten(),self.kern._get_params_transformed()])
def _get_param_names(self):
if not self.EP:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
else:
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + self.kern._get_param_names_transformed()
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
#TODO: slices for psi statistics (easy enough)
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
if not self.EP:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)#.sum() NOTE psi0 is now a vector
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
#self.psi2_beta_scaled = ?
else:
raise NotImplementedError, "uncertain_inputs not yet supported for EP"
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)#.sum()
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T)
self.psi2_beta_scaled = np.dot(self.psi1,self.beta*self.psi1.T)
def _computations(self):
# TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
self.trace_K_beta_scaled = (self.psi0*self.beta).sum() - np.trace(self.A)
if not self.EP:
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.beta
# Compute dL_dpsi
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T)
if not self.EP:
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
if self.has_uncertain_inputs:
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
else:
self.dL_dpsi0 = - 0.5 * self.D * self.beta.flatten()
if not self.has_uncertain_inputs:
self.dL_dpsi2_ = - 0.5 * (self.D*(self.LBL_inv - self.Kmmi) + self.G)
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*mdot(self.LBL_inv, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.psi2_beta_scaled) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
def approximate_likelihood(self):
assert not isinstance(self.likelihood, gaussian), "EP is only available for non-gaussian likelihoods"
if self.method_ep == 'DTC':
self.ep_approx = DTC(self.Kmm,self.likelihood,self.psi1,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
elif self.method_ep == 'FITC':
self.ep_approx = FITC(self.Kmm,self.likelihood,self.psi1,self.psi0,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
else:
self.ep_approx = Full(self.X,self.likelihood,self.kernel,inducing=None,epsilon=self.epsilon_ep,power_ep=[self.eta,self.delta])
self.beta, self.Y, self.Z_ep = self.ep_approx.fit_EP()
self.trbetaYYT = np.sum(np.square(self.Y)*self.beta)
self._computations()
def log_likelihood(self):
"""
Compute the (lower bound on the) log marginal likelihood
"""
if not self.EP:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
D = -0.5*self.beta*self.trYYT
else:
A = -0.5*self.D*(self.N*np.log(2.*np.pi) - np.sum(np.log(self.beta)))
D = -0.5*self.trbetaYYT
B = -0.5*self.D*self.trace_K_beta_scaled
C = -0.5*self.D * self.B_logdet
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return A+B+C+D+E
def dL_dbeta(self):
"""
Compute the gradient of the log likelihood wrt beta.
"""
#TODO: suport heteroscedatic noise
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta)
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1) #dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty)
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2_,self.beta.T*self.psi1)#dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ
def _log_likelihood_gradients(self):
if not self.EP:
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
else:
return np.hstack([self.dL_dZ().flatten(), self.dL_dtheta()])
def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.LBL_inv, self.psi1V)
phi = None
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx)
if not self.EP:
var += np.eye(Xnew.shape[0])/self.beta
else:
raise NotImplementedError, "full_cov = True not implemented for EP"
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0)
if not self.EP:
var += 1./self.beta
else:
phi = self.likelihood.predictive_mean(mu,var)
return mu,var,phi
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
"""
GP.plot(self,*args,**kwargs)
if self.Q==1:
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')

View file

@ -1,205 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot
from .. import kern
from ..inference.likelihoods import likelihood
from GP_regression import GP_regression
#Still TODO:
# make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP_regression(GP_regression):
"""
Variational sparse GP model (Regression)
:param X: inputs
:type X: np.ndarray (N x Q)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
self.scale_factor = 1000.0
self.beta = beta
if Z is None:
self.Z = np.random.permutation(X.copy())[:M]
self.M = M
else:
assert Z.shape[1]==X.shape[1]
self.Z = Z
self.M = Z.shape[0]
if X_uncertainty is None:
self.has_uncertain_inputs=False
else:
assert X_uncertainty.shape==X.shape
self.has_uncertain_inputs=True
self.X_uncertainty = X_uncertainty
GP_regression.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y)
self.trYYT = np.sum(np.square(self.Y))
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _computations(self):
# TODO find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough)
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0)
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X)
#self.psi2 = np.dot(self.psi1,self.psi1.T)
#self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta))
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
sf = self.scale_factor
sf2 = sf**2
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
self.V = (self.beta/self.scale_factor)*self.Y
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT
B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D
def _log_likelihood_gradients(self):
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
def dL_dbeta(self):
"""
Compute the gradient of the log likelihood wrt beta.
"""
#TODO: suport heteroscedatic noise
sf2 = self.scale_factor**2
dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ
def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
return mu,var
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
"""
GP_regression.plot(self,*args,**kwargs)
if self.Q==1:
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')