mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-08 15:05:15 +02:00
removed uncollapsed sparse GP. superceeded by the forthcoming svigp package
This commit is contained in:
parent
6dced9ac39
commit
23bde6f3dd
1 changed files with 0 additions and 151 deletions
|
|
@ -1,151 +0,0 @@
|
|||
# Copyright (c) 2012 James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import pylab as pb
|
||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
|
||||
from .. import kern
|
||||
from ..likelihoods import likelihood
|
||||
from sparse_GP import sparse_GP
|
||||
|
||||
class uncollapsed_sparse_GP(sparse_GP):
|
||||
"""
|
||||
Variational sparse GP model (Regression), where the approximating distribution q(u) is represented explicitly
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (N x Q)
|
||||
:param likelihood: GPy likelihood class, containing observed data
|
||||
:param q_u: canonical parameters of the distribution squasehd into a 1D array
|
||||
:type q_u: np.ndarray
|
||||
:param kernel : the kernel/covariance function. See link kernels
|
||||
:type kernel: a GPy kernel
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (M x Q) | None
|
||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
||||
:param normalize_X : whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_X: bool
|
||||
"""
|
||||
|
||||
def __init__(self, X, likelihood, kernel, Z, q_u=None, **kwargs):
|
||||
self.M = Z.shape[0]
|
||||
if q_u is None:
|
||||
q_u = np.hstack((np.random.randn(self.M*likelihood.D),-0.5*np.eye(self.M).flatten()))
|
||||
self.likelihood = likelihood
|
||||
self.set_vb_param(q_u)
|
||||
sparse_GP.__init__(self, X, likelihood, kernel, Z, **kwargs)
|
||||
|
||||
def _computations(self):
|
||||
# kernel computations, using BGPLVM notation
|
||||
self.Kmm = self.kern.K(self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
|
||||
self.psi1 = self.kern.K(self.Z,self.X)
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||
self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
|
||||
|
||||
|
||||
self.V = self.likelihood.precision*self.Y
|
||||
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
|
||||
self.psi1V = np.dot(self.psi1, self.V)
|
||||
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
|
||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
||||
self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
|
||||
self.B = np.eye(self.M) + self.A
|
||||
self.Lambda = mdot(self.Lmi.T,self.B,self.Lmi)
|
||||
self.trace_K = self.psi0 - np.trace(self.A)/self.beta
|
||||
self.projected_mean = mdot(self.psi1.T,self.Kmmi,self.q_u_expectation[0])
|
||||
|
||||
# Compute dL_dpsi
|
||||
self.dL_dpsi0 = - 0.5 * self.likelihood.D * self.beta * np.ones(self.N)
|
||||
self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T # This is the correct term for E I think...
|
||||
self.dL_dpsi2 = 0.5 * self.beta * self.likelihood.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
|
||||
|
||||
# Compute dL_dKmm
|
||||
tmp = self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) -np.dot(self.q_u_expectation[0],self.psi1V.T)
|
||||
tmp += tmp.T
|
||||
tmp += self.likelihood.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1])
|
||||
self.dL_dKmm = 0.5*mdot(self.Kmmi,tmp,self.Kmmi)
|
||||
|
||||
#Compute the gradient of the log likelihood wrt noise variance
|
||||
#TODO: suport heteroscedatic noise
|
||||
dbeta = 0.5 * self.N*self.likelihood.D/self.beta
|
||||
dbeta += - 0.5 * self.likelihood.D * self.trace_K
|
||||
dbeta += - 0.5 * self.likelihood.D * np.sum(self.q_u_expectation[1]*mdot(self.Kmmi,self.psi2,self.Kmmi))
|
||||
dbeta += - 0.5 * self.trYYT
|
||||
dbeta += np.sum(np.dot(self.Y.T,self.projected_mean))
|
||||
self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
Compute the (lower bound on the) log marginal likelihood
|
||||
"""
|
||||
A = -0.5*self.N*self.likelihood.D*(np.log(2.*np.pi) - np.log(self.beta))
|
||||
B = -0.5*self.beta*self.likelihood.D*self.trace_K
|
||||
C = -0.5*self.likelihood.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M)
|
||||
D = -0.5*self.beta*self.trYYT
|
||||
E = np.sum(np.dot(self.V.T,self.projected_mean))
|
||||
return A+B+C+D+E
|
||||
|
||||
def _raw_predict(self, Xnew, slices,full_cov=False):
|
||||
"""Internal helper function for making predictions, does not account for normalization"""
|
||||
Kx = self.kern.K(Xnew,self.Z)
|
||||
mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])
|
||||
|
||||
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(Xnew)
|
||||
var = Kxx - mdot(Kx,tmp,Kx.T)
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
|
||||
return mu,var
|
||||
|
||||
|
||||
def set_vb_param(self,vb_param):
|
||||
"""set the distribution q(u) from the canonical parameters"""
|
||||
self.q_u_prec = -2.*vb_param[-self.M**2:].reshape(self.M, self.M)
|
||||
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
|
||||
self.q_u_logdet = -tmp
|
||||
self.q_u_mean = np.dot(self.q_u_cov,vb_param[:self.M*self.likelihood.D].reshape(self.M,self.likelihood.D))
|
||||
|
||||
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.likelihood.D)
|
||||
|
||||
self.q_u_canonical = (np.dot(self.q_u_prec, self.q_u_mean),-0.5*self.q_u_prec)
|
||||
#TODO: computations now?
|
||||
|
||||
def get_vb_param(self):
|
||||
"""
|
||||
Return the canonical parameters of the distribution q(u)
|
||||
"""
|
||||
return np.hstack([e.flatten() for e in self.q_u_canonical])
|
||||
|
||||
def vb_grad_natgrad(self):
|
||||
"""
|
||||
Compute the gradients of the lower bound wrt the canonical and
|
||||
Expectation parameters of u.
|
||||
|
||||
Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
|
||||
"""
|
||||
dL_dmmT_S = -0.5*self.Lambda-self.q_u_canonical[1]
|
||||
dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
|
||||
|
||||
#dL_dSim =
|
||||
#dL_dmhSi =
|
||||
|
||||
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO
|
||||
|
||||
|
||||
def plot(self, *args, **kwargs):
|
||||
"""
|
||||
add the distribution q(u) to the plot from sparse_GP
|
||||
"""
|
||||
sparse_GP.plot(self,*args,**kwargs)
|
||||
if self.Q==1:
|
||||
pb.errorbar(self.Z[:,0],self.q_u_expectation[0][:,0],yerr=2.*np.sqrt(np.diag(self.q_u_cov)),fmt=None,ecolor='b')
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue