Merge pull request #562 from SheffieldML/external-mo

Release the implementation of LVMOGP
This commit is contained in:
Zhenwen Dai 2017-10-11 21:55:53 +01:00 committed by GitHub
commit 8af7dedf4b
9 changed files with 1299 additions and 8 deletions

View file

@ -91,6 +91,8 @@ from .pep import PEP
from .var_dtc_parallel import VarDTC_minibatch from .var_dtc_parallel import VarDTC_minibatch
from .var_gauss import VarGauss from .var_gauss import VarGauss
from .gaussian_grid_inference import GaussianGridInference from .gaussian_grid_inference import GaussianGridInference
from .vardtc_svi_multiout import VarDTC_SVI_Multiout
from .vardtc_svi_multiout_miss import VarDTC_SVI_Multiout_Miss
# class FullLatentFunctionData(object): # class FullLatentFunctionData(object):

View file

@ -0,0 +1,177 @@
from GPy.util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv, dpotri
from GPy.util import diag
from GPy.core.parameterization.variational import VariationalPosterior
import numpy as np
from GPy.inference.latent_function_inference import LatentFunctionInference
from GPy.inference.latent_function_inference.posterior import Posterior
log_2_pi = np.log(2*np.pi)
class VarDTC_MD(LatentFunctionInference):
"""
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
The function self.inference returns a Posterior object, which summarizes
the posterior.
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
"""
const_jitter = 1e-6
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs):
if uncertain_inputs:
psi0 = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X)
psi2 = kern.psi2n(Z, X)
else:
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
psi2 = psi1[:,:,None]*psi1[:,None,:]
return psi0, psi1, psi2
def inference(self, kern, X, Z, likelihood, Y, indexD, output_dim, Y_metadata=None, Lm=None, dL_dKmm=None, Kuu_sigma=None):
"""
The first phase of inference:
Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv,
"""
input_dim = Z.shape[0]
uncertain_inputs = isinstance(X, VariationalPosterior)
beta = 1./likelihood.variance
if len(beta)==1:
beta = np.zeros(output_dim)+beta
beta_exp = np.zeros(indexD.shape[0])
for d in range(output_dim):
beta_exp[indexD==d] = beta[d]
psi0, psi1, psi2 = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
psi2_sum = (beta_exp[:,None,None]*psi2).sum(0)/output_dim
#======================================================================
# Compute Common Components
#======================================================================
Kmm = kern.K(Z).copy()
if Kuu_sigma is not None:
diag.add(Kmm, Kuu_sigma)
else:
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm)
logL = 0.
dL_dthetaL = np.zeros(output_dim)
dL_dKmm = np.zeros_like(Kmm)
dL_dpsi0 = np.zeros_like(psi0)
dL_dpsi1 = np.zeros_like(psi1)
dL_dpsi2 = np.zeros_like(psi2)
wv = np.empty((Kmm.shape[0],output_dim))
for d in range(output_dim):
idx_d = indexD==d
Y_d = Y[idx_d]
N_d = Y_d.shape[0]
beta_d = beta[d]
psi2_d = psi2[idx_d].sum(0)*beta_d
psi1Y = Y_d.T.dot(psi1[idx_d])*beta_d
psi0_d = psi0[idx_d].sum()*beta_d
YRY_d = np.square(Y_d).sum()*beta_d
LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_d, 'right')
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda)
LmLL = Lm.dot(LL)
b = dtrtrs(LmLL, psi1Y.T)[0].T
bbt = np.square(b).sum()
v = dtrtrs(LmLL, b.T, trans=1)[0].T
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
tmp = -backsub_both_sides(LL, LLinvPsi1TYYTPsi1LLinvT)
dL_dpsi2R = backsub_both_sides(Lm, tmp+np.eye(input_dim))/2
logL_R = -N_d*np.log(beta_d)
logL += -((N_d*log_2_pi+logL_R+psi0_d-np.trace(LmInvPsi2LmInvT))+YRY_d- bbt)/2.
dL_dKmm += dL_dpsi2R - backsub_both_sides(Lm, LmInvPsi2LmInvT)/2
dL_dthetaL[d:d+1] = (YRY_d*beta_d + beta_d*psi0_d - N_d*beta_d)/2. - beta_d*(dL_dpsi2R*psi2_d).sum() - beta_d*np.trace(LLinvPsi1TYYTPsi1LLinvT)
dL_dpsi0[idx_d] = -beta_d/2.
dL_dpsi1[idx_d] = beta_d*np.dot(Y_d,v)
dL_dpsi2[idx_d] = beta_d*dL_dpsi2R
wv[:,d] = v
LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_sum, 'right')
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda)
LmLL = Lm.dot(LL)
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
dL_dpsi2R_common = dpotri(LmLL)[0]/-2.
dL_dpsi2 += dL_dpsi2R_common[None,:,:]*beta_exp[:,None,None]
for d in range(output_dim):
dL_dthetaL[d] += (dL_dpsi2R_common*psi2[indexD==d].sum(0)).sum()*-beta[d]*beta[d]
dL_dKmm += dL_dpsi2R_common*output_dim
logL += -output_dim*logdet_L/2.
#======================================================================
# Compute dL_dKmm
#======================================================================
# dL_dKmm = dL_dpsi2R - output_dim* backsub_both_sides(Lm, LmInvPsi2LmInvT)/2 #LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)
#======================================================================
LLInvLmT = dtrtrs(LL, Lm.T)[0]
cov = tdot(LLInvLmT.T)
wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
post = Posterior(woodbury_inv=wd_inv, woodbury_vector=wv, K=Kmm, mean=None, cov=cov, K_chol=Lm)
#======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise
#======================================================================
# for d in range(output_dim):
# dL_dthetaL[d:d+1] += - beta[d]*beta[d]*(dL_dpsi2R[None,:,:] * psi2[indexD==d]/output_dim).sum()
# dL_dthetaL += - (dL_dpsi2R[None,:,:] * psi2_sum*D beta*(dL_dpsi2R*psi2).sum()
#======================================================================
# Compute dL_dpsi
#======================================================================
if not uncertain_inputs:
dL_dpsi1 += (psi1[:,None,:]*dL_dpsi2).sum(2)*2.
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'dL_dthetaL':dL_dthetaL}
else:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL}
return post, logL, grad_dict

View file

@ -0,0 +1,271 @@
#from .posterior import Posterior
from GPy.util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv, dpotri
from GPy.util import diag
from GPy.core.parameterization.variational import VariationalPosterior
import numpy as np
from GPy.inference.latent_function_inference import LatentFunctionInference
from GPy.inference.latent_function_inference.posterior import Posterior
log_2_pi = np.log(2*np.pi)
class VarDTC_SVI_Multiout(LatentFunctionInference):
"""
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
The function self.inference returns a Posterior object, which summarizes
the posterior.
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
"""
const_jitter = 1e-6
def get_trYYT(self, Y):
return np.sum(np.square(Y))
def get_YYTfactor(self, Y):
N, D = Y.shape
if (N>=D):
return Y.view(np.ndarray)
else:
return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, uncertain_inputs):
if uncertain_inputs:
psi0 = kern.psi0(Z, X).sum()
psi1 = kern.psi1(Z, X)
psi2 = kern.psi2(Z, X)
else:
psi0 = kern.Kdiag(X).sum()
psi1 = kern.K(X, Z)
psi2 = tdot(psi1.T)
return psi0, psi1, psi2
def inference(self, kern_r, kern_c, Xr, Xc, Zr, Zc, likelihood, Y, qU_mean ,qU_var_r, qU_var_c):
"""
The SVI-VarDTC inference
"""
N, D, Mr, Mc, Qr, Qc = Y.shape[0], Y.shape[1], Zr.shape[0], Zc.shape[0], Zr.shape[1], Zc.shape[1]
uncertain_inputs_r = isinstance(Xr, VariationalPosterior)
uncertain_inputs_c = isinstance(Xc, VariationalPosterior)
uncertain_outputs = isinstance(Y, VariationalPosterior)
beta = 1./likelihood.variance
psi0_r, psi1_r, psi2_r = self.gatherPsiStat(kern_r, Xr, Zr, uncertain_inputs_r)
psi0_c, psi1_c, psi2_c = self.gatherPsiStat(kern_c, Xc, Zc, uncertain_inputs_c)
#======================================================================
# Compute Common Components
#======================================================================
Kuu_r = kern_r.K(Zr).copy()
diag.add(Kuu_r, self.const_jitter)
Lr = jitchol(Kuu_r)
Kuu_c = kern_c.K(Zc).copy()
diag.add(Kuu_c, self.const_jitter)
Lc = jitchol(Kuu_c)
mu, Sr, Sc = qU_mean, qU_var_r, qU_var_c
LSr = jitchol(Sr)
LSc = jitchol(Sc)
LcInvMLrInvT = dtrtrs(Lc,dtrtrs(Lr,mu.T)[0].T)[0]
LcInvPsi2_cLcInvT = backsub_both_sides(Lc, psi2_c,'right')
LrInvPsi2_rLrInvT = backsub_both_sides(Lr, psi2_r,'right')
LcInvLSc = dtrtrs(Lc, LSc)[0]
LrInvLSr = dtrtrs(Lr, LSr)[0]
LcInvScLcInvT = tdot(LcInvLSc)
LrInvSrLrInvT = tdot(LrInvLSr)
LcInvPsi1_cT = dtrtrs(Lc, psi1_c.T)[0]
LrInvPsi1_rT = dtrtrs(Lr, psi1_r.T)[0]
tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT = (LrInvPsi2_rLrInvT*LrInvSrLrInvT).sum()
tr_LcInvPsi2_cLcInvT_LcInvScLcInvT = (LcInvPsi2_cLcInvT*LcInvScLcInvT).sum()
tr_LrInvSrLrInvT = np.square(LrInvLSr).sum()
tr_LcInvScLcInvT = np.square(LcInvLSc).sum()
tr_LrInvPsi2_rLrInvT = np.trace(LrInvPsi2_rLrInvT)
tr_LcInvPsi2_cLcInvT = np.trace(LcInvPsi2_cLcInvT)
#======================================================================
# Compute log-likelihood
#======================================================================
logL_A = - np.square(Y).sum() \
- (LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT)*LrInvPsi2_rLrInvT).sum() \
- tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT \
+ 2 * (Y * LcInvPsi1_cT.T.dot(LcInvMLrInvT).dot(LrInvPsi1_rT)).sum() - psi0_c * psi0_r \
+ tr_LrInvPsi2_rLrInvT * tr_LcInvPsi2_cLcInvT
logL = -N*D/2.*(np.log(2.*np.pi)-np.log(beta)) + beta/2.* logL_A \
-Mc * (np.log(np.diag(Lr)).sum()-np.log(np.diag(LSr)).sum()) -Mr * (np.log(np.diag(Lc)).sum()-np.log(np.diag(LSc)).sum()) \
- np.square(LcInvMLrInvT).sum()/2. - tr_LrInvSrLrInvT * tr_LcInvScLcInvT/2. + Mr*Mc/2.
#======================================================================
# Compute dL_dKuu
#======================================================================
tmp = beta* LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) \
+ beta* tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT.dot(LcInvScLcInvT) \
- beta* LcInvMLrInvT.dot(LrInvPsi1_rT).dot(Y.T).dot(LcInvPsi1_cT.T) \
- beta/2. * tr_LrInvPsi2_rLrInvT* LcInvPsi2_cLcInvT - Mr/2.*np.eye(Mc) \
+ tdot(LcInvMLrInvT)/2. + tr_LrInvSrLrInvT/2. * LcInvScLcInvT
dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
dL_dKuu_c += dL_dKuu_c.T
dL_dKuu_c *= 0.5
tmp = beta* LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT) \
+ beta* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT.dot(LrInvSrLrInvT) \
- beta* LrInvPsi1_rT.dot(Y.T).dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT) \
- beta/2. * tr_LcInvPsi2_cLcInvT * LrInvPsi2_rLrInvT - Mc/2.*np.eye(Mr) \
+ tdot(LcInvMLrInvT.T)/2. + tr_LcInvScLcInvT/2. * LrInvSrLrInvT
dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
dL_dKuu_r += dL_dKuu_r.T
dL_dKuu_r *= 0.5
#======================================================================
# Compute dL_dthetaL
#======================================================================
dL_dthetaL = -D*N*beta/2. - logL_A*beta*beta/2.
#======================================================================
# Compute dL_dqU
#======================================================================
tmp = -beta * LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT)\
+ beta* LcInvPsi1_cT.dot(Y).dot(LrInvPsi1_rT.T) - LcInvMLrInvT
dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]
LScInv = dtrtri(LSc)
tmp = -beta/2.*tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT -tr_LrInvSrLrInvT/2.*np.eye(Mc)
dL_dqU_var_c = backsub_both_sides(Lc, tmp, 'left') + tdot(LScInv.T) * Mr/2.
LSrInv = dtrtri(LSr)
tmp = -beta/2.*tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT -tr_LcInvScLcInvT/2.*np.eye(Mr)
dL_dqU_var_r = backsub_both_sides(Lr, tmp, 'left') + tdot(LSrInv.T) * Mc/2.
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)
#======================================================================
post = PosteriorMultioutput(LcInvMLrInvT=LcInvMLrInvT, LcInvScLcInvT=LcInvScLcInvT,
LrInvSrLrInvT=LrInvSrLrInvT, Lr=Lr, Lc=Lc, kern_r=kern_r, Xr=Xr, Zr=Zr)
#======================================================================
# Compute dL_dpsi
#======================================================================
dL_dpsi0_r = - psi0_c * beta/2. * np.ones((D,))
dL_dpsi0_c = - psi0_r * beta/2. * np.ones((N,))
dL_dpsi1_c = beta * dtrtrs(Lc, (Y.dot(LrInvPsi1_rT.T).dot(LcInvMLrInvT.T)).T, trans=1)[0].T
dL_dpsi1_r = beta * dtrtrs(Lr, (Y.T.dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT)).T, trans=1)[0].T
tmp = beta/2.*(-LcInvMLrInvT.dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) - tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvScLcInvT
+tr_LrInvPsi2_rLrInvT *np.eye(Mc))
dL_dpsi2_c = backsub_both_sides(Lc, tmp, 'left')
tmp = beta/2.*(-LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT) - tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvSrLrInvT
+tr_LcInvPsi2_cLcInvT *np.eye(Mr))
dL_dpsi2_r = backsub_both_sides(Lr, tmp, 'left')
if not uncertain_inputs_r:
dL_dpsi1_r += psi1_r.dot(dL_dpsi2_r+dL_dpsi2_r.T)
if not uncertain_inputs_c:
dL_dpsi1_c += psi1_c.dot(dL_dpsi2_c+dL_dpsi2_c.T)
grad_dict = {
'dL_dthetaL':dL_dthetaL,
'dL_dqU_mean':dL_dqU_mean,
'dL_dqU_var_c':dL_dqU_var_c,
'dL_dqU_var_r':dL_dqU_var_r,
'dL_dKuu_c': dL_dKuu_c,
'dL_dKuu_r': dL_dKuu_r,
}
if uncertain_inputs_c:
grad_dict['dL_dpsi0_c'] = dL_dpsi0_c
grad_dict['dL_dpsi1_c'] = dL_dpsi1_c
grad_dict['dL_dpsi2_c'] = dL_dpsi2_c
else:
grad_dict['dL_dKdiag_c'] = dL_dpsi0_c
grad_dict['dL_dKfu_c'] = dL_dpsi1_c
if uncertain_inputs_r:
grad_dict['dL_dpsi0_r'] = dL_dpsi0_r
grad_dict['dL_dpsi1_r'] = dL_dpsi1_r
grad_dict['dL_dpsi2_r'] = dL_dpsi2_r
else:
grad_dict['dL_dKdiag_r'] = dL_dpsi0_r
grad_dict['dL_dKfu_r'] = dL_dpsi1_r
return post, logL, grad_dict
class PosteriorMultioutput(object):
def __init__(self,LcInvMLrInvT, LcInvScLcInvT, LrInvSrLrInvT, Lr, Lc, kern_r, Xr, Zr):
self.LcInvMLrInvT = LcInvMLrInvT
self.LcInvScLcInvT = LcInvScLcInvT
self.LrInvSrLrInvT = LrInvSrLrInvT
self.Lr = Lr
self.Lc = Lc
self.kern_r = kern_r
self.Xr = Xr
self.Zr = Zr
def _prepare(self):
D, Mr, Mc = self.Xr.shape[0], self.Zr.shape[0], self.LcInvMLrInvT.shape[0]
psi2_r_n = self.kern_r.psi2n(self.Zr, self.Xr)
psi0_r = self.kern_r.psi0(self.Zr, self.Xr)
psi1_r = self.kern_r.psi1(self.Zr, self.Xr)
LrInvPsi1_rT = dtrtrs(self.Lr, psi1_r.T)[0]
self.woodbury_vector = self.LcInvMLrInvT.dot(LrInvPsi1_rT)
LrInvPsi2_r_nLrInvT = dtrtrs(self.Lr, np.swapaxes((dtrtrs(self.Lr, psi2_r_n.reshape(D*Mr,Mr).T)[0].T).reshape(D,Mr,Mr),1,2).reshape(D*Mr,Mr).T)[0].T.reshape(D,Mr,Mr)
tr_LrInvPsi2_r_nLrInvT = LrInvPsi2_r_nLrInvT.reshape(D,Mr*Mr).sum(1)
tr_LrInvPsi2_r_nLrInvT_LrInvSrLrInvT = LrInvPsi2_r_nLrInvT.reshape(D,Mr*mr).dot(self.LrInvSrLrInvT.flat)
tmp = LrInvPsi2_r_nLrInvT - LrInvPsi1_rT.T[:,:,None]*LrInvPsi1_rT.T[:,None,:]
tmp = np.swapaxes(tmp.reshape(D*Mr,Mr).dot(self.LcInvMLrInvT.T).reshape(D,Mr,Mc), 1,2).reshape(D*Mc,Mr).dot(self.LcInvMLrInvT.T).reshape(D,Mc,Mc)
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
N = Xnew.shape[0]
psi1_c = kern.K(Xnew, pred_var)
psi0_c = kern.Kdiag(Xnew)
LcInvPsi1_cT = dtrtrs(self.Lc, psi1_c.T)[0]
D, Mr, Mc = self.Xr.shape[0], self.Zr.shape[0], self.LcInvMLrInvT.shape[0]
psi2_r_n = self.kern_r.psi2n(self.Zr, self.Xr)
psi0_r = self.kern_r.psi0(self.Zr, self.Xr)
psi1_r = self.kern_r.psi1(self.Zr, self.Xr)
LrInvPsi1_rT = dtrtrs(self.Lr, psi1_r.T)[0]
woodbury_vector = self.LcInvMLrInvT.dot(LrInvPsi1_rT)
mu = np.dot(LcInvPsi1_cT.T, woodbury_vector)
LrInvPsi2_r_nLrInvT = dtrtrs(self.Lr, np.swapaxes((dtrtrs(self.Lr, psi2_r_n.reshape(D*Mr,Mr).T)[0].T).reshape(D,Mr,Mr),1,2).reshape(D*Mr,Mr).T)[0].T.reshape(D,Mr,Mr)
tr_LrInvPsi2_r_nLrInvT = np.diagonal(LrInvPsi2_r_nLrInvT,axis1=1,axis2=2).sum(1)
tr_LrInvPsi2_r_nLrInvT_LrInvSrLrInvT = LrInvPsi2_r_nLrInvT.reshape(D,Mr*Mr).dot(self.LrInvSrLrInvT.flat)
tmp = LrInvPsi2_r_nLrInvT - LrInvPsi1_rT.T[:,:,None]*LrInvPsi1_rT.T[:,None,:]
tmp = np.swapaxes(tmp.reshape(D*Mr,Mr).dot(self.LcInvMLrInvT.T).reshape(D,Mr,Mc), 1,2).reshape(D*Mc,Mr).dot(self.LcInvMLrInvT.T).reshape(D,Mc,Mc)
var1 = (tmp.reshape(D*Mc,Mc).dot(LcInvPsi1_cT).reshape(D,Mc,N)*LcInvPsi1_cT[None,:,:]).sum(1).T
var2 = psi0_c[:,None]*psi0_r[None,:]
var3 = tr_LrInvPsi2_r_nLrInvT[None,:]*np.square(LcInvPsi1_cT).sum(0)[:,None]
var4 = tr_LrInvPsi2_r_nLrInvT_LrInvSrLrInvT[None,:]* (self.LcInvScLcInvT.dot(LcInvPsi1_cT)*LcInvPsi1_cT).sum(0)[:,None]
var = var1+var2-var3+var4
return mu, var

View file

@ -0,0 +1,306 @@
#from .posterior import Posterior
from GPy.util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv, dpotri
from GPy.util import diag
from GPy.core.parameterization.variational import VariationalPosterior
import numpy as np
from GPy.inference.latent_function_inference import LatentFunctionInference
from GPy.inference.latent_function_inference.posterior import Posterior
from .vardtc_svi_multiout import PosteriorMultioutput
log_2_pi = np.log(2*np.pi)
class VarDTC_SVI_Multiout_Miss(LatentFunctionInference):
"""
"""
const_jitter = 1e-6
def get_trYYT(self, Y):
return np.sum(np.square(Y))
def get_YYTfactor(self, Y):
N, D = Y.shape
if (N>=D):
return Y.view(np.ndarray)
else:
return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, uncertain_inputs):
if uncertain_inputs:
psi0 = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X)
psi2 = kern.psi2n(Z, X)
else:
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
psi2 = psi1[:,:,None]*psi1[:,None,:]
return psi0, psi1, psi2
def _init_grad_dict(self, N, D, Mr, Mc):
grad_dict = {
'dL_dthetaL': np.zeros(D),
'dL_dqU_mean': np.zeros((Mc,Mr)),
'dL_dqU_var_c':np.zeros((Mc,Mc)),
'dL_dqU_var_r':np.zeros((Mr,Mr)),
'dL_dKuu_c': np.zeros((Mc,Mc)),
'dL_dKuu_r': np.zeros((Mr,Mr)),
'dL_dpsi0_c': np.zeros(N),
'dL_dpsi1_c': np.zeros((N,Mc)),
'dL_dpsi2_c': np.zeros((N,Mc,Mc)),
'dL_dpsi0_r': np.zeros(D),
'dL_dpsi1_r': np.zeros((D,Mr)),
'dL_dpsi2_r': np.zeros((D,Mr,Mr)),
}
return grad_dict
def inference_d(self, d, beta, Y, indexD, grad_dict, mid_res, uncertain_inputs_r, uncertain_inputs_c, Mr, Mc):
idx_d = indexD==d
Y = Y[idx_d]
N, D = Y.shape[0], 1
beta = beta[d]
psi0_r, psi1_r, psi2_r = mid_res['psi0_r'], mid_res['psi1_r'], mid_res['psi2_r']
psi0_c, psi1_c, psi2_c = mid_res['psi0_c'], mid_res['psi1_c'], mid_res['psi2_c']
psi0_r, psi1_r, psi2_r = psi0_r[d], psi1_r[d:d+1], psi2_r[d]
psi0_c, psi1_c, psi2_c = psi0_c[idx_d].sum(), psi1_c[idx_d], psi2_c[idx_d].sum(0)
Lr = mid_res['Lr']
Lc = mid_res['Lc']
LcInvMLrInvT = mid_res['LcInvMLrInvT']
LcInvScLcInvT = mid_res['LcInvScLcInvT']
LrInvSrLrInvT = mid_res['LrInvSrLrInvT']
LcInvPsi2_cLcInvT = backsub_both_sides(Lc, psi2_c,'right')
LrInvPsi2_rLrInvT = backsub_both_sides(Lr, psi2_r,'right')
LcInvPsi1_cT = dtrtrs(Lc, psi1_c.T)[0]
LrInvPsi1_rT = dtrtrs(Lr, psi1_r.T)[0]
tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT = (LrInvPsi2_rLrInvT*LrInvSrLrInvT).sum()
tr_LcInvPsi2_cLcInvT_LcInvScLcInvT = (LcInvPsi2_cLcInvT*LcInvScLcInvT).sum()
tr_LrInvPsi2_rLrInvT = np.trace(LrInvPsi2_rLrInvT)
tr_LcInvPsi2_cLcInvT = np.trace(LcInvPsi2_cLcInvT)
logL_A = - np.square(Y).sum() \
- (LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT)*LrInvPsi2_rLrInvT).sum() \
- tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT \
+ 2 * (Y * LcInvPsi1_cT.T.dot(LcInvMLrInvT).dot(LrInvPsi1_rT)).sum() - psi0_c * psi0_r \
+ tr_LrInvPsi2_rLrInvT * tr_LcInvPsi2_cLcInvT
logL = -N*D/2.*(np.log(2.*np.pi)-np.log(beta)) + beta/2.* logL_A
# ======= Gradients =====
tmp = beta* LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) \
+ beta* tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT.dot(LcInvScLcInvT) \
- beta* LcInvMLrInvT.dot(LrInvPsi1_rT).dot(Y.T).dot(LcInvPsi1_cT.T) \
- beta/2. * tr_LrInvPsi2_rLrInvT* LcInvPsi2_cLcInvT
dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
dL_dKuu_c += dL_dKuu_c.T
dL_dKuu_c *= 0.5
tmp = beta* LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT) \
+ beta* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT.dot(LrInvSrLrInvT) \
- beta* LrInvPsi1_rT.dot(Y.T).dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT) \
- beta/2. * tr_LcInvPsi2_cLcInvT * LrInvPsi2_rLrInvT
dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
dL_dKuu_r += dL_dKuu_r.T
dL_dKuu_r *= 0.5
#======================================================================
# Compute dL_dthetaL
#======================================================================
dL_dthetaL = -D*N*beta/2. - logL_A*beta*beta/2.
#======================================================================
# Compute dL_dqU
#======================================================================
tmp = -beta * LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT)\
+ beta* LcInvPsi1_cT.dot(Y).dot(LrInvPsi1_rT.T)
dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]
tmp = -beta/2.*tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT
dL_dqU_var_c = backsub_both_sides(Lc, tmp, 'left')
tmp = -beta/2.*tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT
dL_dqU_var_r = backsub_both_sides(Lr, tmp, 'left')
#======================================================================
# Compute dL_dpsi
#======================================================================
dL_dpsi0_r = - psi0_c * beta/2. * np.ones((D,))
dL_dpsi0_c = - psi0_r * beta/2. * np.ones((N,))
dL_dpsi1_c = beta * dtrtrs(Lc, (Y.dot(LrInvPsi1_rT.T).dot(LcInvMLrInvT.T)).T, trans=1)[0].T
dL_dpsi1_r = beta * dtrtrs(Lr, (Y.T.dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT)).T, trans=1)[0].T
tmp = beta/2.*(-LcInvMLrInvT.dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) - tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvScLcInvT
+tr_LrInvPsi2_rLrInvT *np.eye(Mc))
dL_dpsi2_c = backsub_both_sides(Lc, tmp, 'left')
tmp = beta/2.*(-LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT) - tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvSrLrInvT
+tr_LcInvPsi2_cLcInvT *np.eye(Mr))
dL_dpsi2_r = backsub_both_sides(Lr, tmp, 'left')
grad_dict['dL_dthetaL'][d:d+1] = dL_dthetaL
grad_dict['dL_dqU_mean'] += dL_dqU_mean
grad_dict['dL_dqU_var_c'] += dL_dqU_var_c
grad_dict['dL_dqU_var_r'] += dL_dqU_var_r
grad_dict['dL_dKuu_c'] += dL_dKuu_c
grad_dict['dL_dKuu_r'] += dL_dKuu_r
# if not uncertain_inputs_r:
# dL_dpsi1_r += (dL_dpsi2_r * psi1_r[:,:,None]).sum(1) + (dL_dpsi2_r * psi1_r[:,None,:]).sum(2)
# if not uncertain_inputs_c:
# dL_dpsi1_c += (dL_dpsi2_c * psi1_c[:,:,None]).sum(1) + (dL_dpsi2_c * psi1_c[:,None,:]).sum(2)
if not uncertain_inputs_r:
dL_dpsi1_r += psi1_r.dot(dL_dpsi2_r+dL_dpsi2_r.T)
if not uncertain_inputs_c:
dL_dpsi1_c += psi1_c.dot(dL_dpsi2_c+dL_dpsi2_c.T)
grad_dict['dL_dpsi0_c'][idx_d] += dL_dpsi0_c
grad_dict['dL_dpsi1_c'][idx_d] += dL_dpsi1_c
grad_dict['dL_dpsi2_c'][idx_d] += dL_dpsi2_c
grad_dict['dL_dpsi0_r'][d:d+1] += dL_dpsi0_r
grad_dict['dL_dpsi1_r'][d:d+1] += dL_dpsi1_r
grad_dict['dL_dpsi2_r'][d] += dL_dpsi2_r
return logL
def inference(self, kern_r, kern_c, Xr, Xc, Zr, Zc, likelihood, Y, qU_mean ,qU_var_r, qU_var_c, indexD, output_dim):
"""
The SVI-VarDTC inference
"""
N, D, Mr, Mc, Qr, Qc = Y.shape[0], output_dim,Zr.shape[0], Zc.shape[0], Zr.shape[1], Zc.shape[1]
uncertain_inputs_r = isinstance(Xr, VariationalPosterior)
uncertain_inputs_c = isinstance(Xc, VariationalPosterior)
uncertain_outputs = isinstance(Y, VariationalPosterior)
grad_dict = self._init_grad_dict(N,D,Mr,Mc)
beta = 1./likelihood.variance
if len(beta)==1:
beta = np.zeros(D)+beta
psi0_r, psi1_r, psi2_r = self.gatherPsiStat(kern_r, Xr, Zr, uncertain_inputs_r)
psi0_c, psi1_c, psi2_c = self.gatherPsiStat(kern_c, Xc, Zc, uncertain_inputs_c)
#======================================================================
# Compute Common Components
#======================================================================
Kuu_r = kern_r.K(Zr).copy()
diag.add(Kuu_r, self.const_jitter)
Lr = jitchol(Kuu_r)
Kuu_c = kern_c.K(Zc).copy()
diag.add(Kuu_c, self.const_jitter)
Lc = jitchol(Kuu_c)
mu, Sr, Sc = qU_mean, qU_var_r, qU_var_c
LSr = jitchol(Sr)
LSc = jitchol(Sc)
LcInvMLrInvT = dtrtrs(Lc,dtrtrs(Lr,mu.T)[0].T)[0]
LcInvLSc = dtrtrs(Lc, LSc)[0]
LrInvLSr = dtrtrs(Lr, LSr)[0]
LcInvScLcInvT = tdot(LcInvLSc)
LrInvSrLrInvT = tdot(LrInvLSr)
tr_LrInvSrLrInvT = np.square(LrInvLSr).sum()
tr_LcInvScLcInvT = np.square(LcInvLSc).sum()
mid_res = {
'psi0_r': psi0_r,
'psi1_r': psi1_r,
'psi2_r': psi2_r,
'psi0_c': psi0_c,
'psi1_c': psi1_c,
'psi2_c': psi2_c,
'Lr':Lr,
'Lc':Lc,
'LcInvMLrInvT': LcInvMLrInvT,
'LcInvScLcInvT': LcInvScLcInvT,
'LrInvSrLrInvT': LrInvSrLrInvT,
}
#======================================================================
# Compute log-likelihood
#======================================================================
logL = 0.
for d in range(D):
logL += self.inference_d(d, beta, Y, indexD, grad_dict, mid_res, uncertain_inputs_r, uncertain_inputs_c, Mr, Mc)
logL += -Mc * (np.log(np.diag(Lr)).sum()-np.log(np.diag(LSr)).sum()) -Mr * (np.log(np.diag(Lc)).sum()-np.log(np.diag(LSc)).sum()) \
- np.square(LcInvMLrInvT).sum()/2. - tr_LrInvSrLrInvT * tr_LcInvScLcInvT/2. + Mr*Mc/2.
#======================================================================
# Compute dL_dKuu
#======================================================================
tmp = tdot(LcInvMLrInvT)/2. + tr_LrInvSrLrInvT/2. * LcInvScLcInvT - Mr/2.*np.eye(Mc)
dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
dL_dKuu_c += dL_dKuu_c.T
dL_dKuu_c *= 0.5
tmp = tdot(LcInvMLrInvT.T)/2. + tr_LcInvScLcInvT/2. * LrInvSrLrInvT - Mc/2.*np.eye(Mr)
dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
dL_dKuu_r += dL_dKuu_r.T
dL_dKuu_r *= 0.5
#======================================================================
# Compute dL_dqU
#======================================================================
tmp = - LcInvMLrInvT
dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]
LScInv = dtrtri(LSc)
tmp = -tr_LrInvSrLrInvT/2.*np.eye(Mc)
dL_dqU_var_c = backsub_both_sides(Lc, tmp, 'left') + tdot(LScInv.T) * Mr/2.
LSrInv = dtrtri(LSr)
tmp = -tr_LcInvScLcInvT/2.*np.eye(Mr)
dL_dqU_var_r = backsub_both_sides(Lr, tmp, 'left') + tdot(LSrInv.T) * Mc/2.
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)
#======================================================================
post = PosteriorMultioutput(LcInvMLrInvT=LcInvMLrInvT, LcInvScLcInvT=LcInvScLcInvT,
LrInvSrLrInvT=LrInvSrLrInvT, Lr=Lr, Lc=Lc, kern_r=kern_r, Xr=Xr, Zr=Zr)
#======================================================================
# Compute dL_dpsi
#======================================================================
grad_dict['dL_dqU_mean'] += dL_dqU_mean
grad_dict['dL_dqU_var_c'] += dL_dqU_var_c
grad_dict['dL_dqU_var_r'] += dL_dqU_var_r
grad_dict['dL_dKuu_c'] += dL_dKuu_c
grad_dict['dL_dKuu_r'] += dL_dKuu_r
if not uncertain_inputs_c:
grad_dict['dL_dKdiag_c'] = grad_dict['dL_dpsi0_c']
grad_dict['dL_dKfu_c'] = grad_dict['dL_dpsi1_c']
if not uncertain_inputs_r:
grad_dict['dL_dKdiag_r'] = grad_dict['dL_dpsi0_r']
grad_dict['dL_dKfu_r'] = grad_dict['dL_dpsi1_r']
return post, logL, grad_dict

View file

@ -27,3 +27,5 @@ from .state_space_model import StateSpace
from .ibp_lfm import IBPLFM from .ibp_lfm import IBPLFM
from .gp_offset_regression import GPOffsetRegression from .gp_offset_regression import GPOffsetRegression
from .gp_grid_regression import GPRegressionGrid from .gp_grid_regression import GPRegressionGrid
from .gp_multiout_regression import GPMultioutRegression
from .gp_multiout_regression_md import GPMultioutRegressionMD

View file

@ -0,0 +1,175 @@
# Copyright (c) 2017 Zhenwen Dai
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from .. import likelihoods
from .. import kern
from .. import util
from GPy.core.parameterization.variational import NormalPosterior, NormalPrior
from ..core.parameterization.param import Param
from paramz.transformations import Logexp
from ..util.linalg import tdot
class GPMultioutRegression(SparseGP):
"""
Gaussian Process model for scalable multioutput regression
This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X_list: list of input observations corresponding to each output
:type X_list: list of numpy arrays
:param Y_list: list of observed values related to the different noise models
:type Y_list: list of numpy arrays
:param kernel: a GPy kernel ** Coregionalized, defaults to RBF ** Coregionalized
:type kernel: None | GPy.kernel defaults
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
:type likelihoods_list: None | a list GPy.likelihoods
:param name: model name
:type name: string
:param W_rank: number tuples of the corregionalization parameters 'W' (see coregionalize kernel documentation)
:type W_rank: integer
:param kernel_name: name of the kernel
:type kernel_name: string
"""
def __init__(self, X, Y, Xr_dim, kernel=None, kernel_row=None, likelihood=None, Z=None, Z_row=None, X_row=None, Xvariance_row=None, num_inducing=(10,10), qU_var_r_W_dim=None, qU_var_c_W_dim=None, init='GP', name='GPMR'):
#Kernel
if kernel is None:
kernel = kern.RBF(X.shape[1])
if kernel_row is None:
kernel_row = kern.RBF(Xr_dim,name='kern_row')
if init=='GP':
from . import SparseGPRegression, BayesianGPLVM
from ..util.linalg import jitchol
Mc, Mr = num_inducing
print('Intializing with GP...')
print('Fit Sparse GP...')
m_sgp = SparseGPRegression(X,Y,kernel=kernel.copy(),num_inducing=Mc)
m_sgp.likelihood.variance[:] = Y.var()*0.01
m_sgp.optimize(max_iters=1000)
print('Fit BGPLVM...')
m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,Xr_dim,kernel=kernel_row.copy(), num_inducing=Mr)
m_lvm.likelihood.variance[:] = m_lvm.Y.var()*0.01
m_lvm.optimize(max_iters=10000)
kernel[:] = m_sgp.kern.param_array.copy()
kernel.variance[:] = np.sqrt(kernel.variance)
Z = m_sgp.Z.values.copy()
kernel_row[:] = m_lvm.kern.param_array.copy()
kernel_row.variance[:] = np.sqrt(kernel_row.variance)
Z_row = m_lvm.Z.values.copy()
X_row = m_lvm.X.mean.values.copy()
Xvariance_row = m_lvm.X.variance.values
qU_mean = m_lvm.posterior.mean.T.copy()
qU_var_col_W = jitchol(m_sgp.posterior.covariance)
qU_var_col_diag = np.full(Mc,1e-5)
qU_var_row_W = jitchol(m_lvm.posterior.covariance)
qU_var_row_diag = np.full(Mr,1e-5)
print('Done.')
else:
qU_mean = np.zeros(num_inducing)
qU_var_col_W = np.random.randn(num_inducing[0],num_inducing[0] if qU_var_c_W_dim is None else qU_var_c_W_dim)*0.01
qU_var_col_diag = np.full(num_inducing[0],1e-5)
qU_var_row_W = np.random.randn(num_inducing[1],num_inducing[1] if qU_var_r_W_dim is None else qU_var_r_W_dim)*0.01
qU_var_row_diag = np.full(num_inducing[1],1e-5)
if X_row is None:
u,s,v = np.linalg.svd(Y)
X_row = Y.T.dot(u[:,:Xr_dim])#*np.sqrt(s)[:Xr_dim])
X_row = X_row/X_row.std(0)
if Xvariance_row is None:
Xvariance_row = np.ones((Y.shape[1],Xr_dim))*0.0001
if Z is None:
Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
if Z_row is None:
Z_row = X_row[np.random.permutation(X_row.shape[0])[:num_inducing[1]]].copy()
self.kern_row = kernel_row
self.X_row = NormalPosterior(X_row, Xvariance_row,name='Xr')
self.Z_row = Param('Zr', Z_row)
self.variational_prior_row = NormalPrior()
self.qU_mean = Param('qU_mean', qU_mean)
self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag, Logexp())
self.qU_var_r_W = Param('qU_var_row_W',qU_var_row_W)
self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag, Logexp())
#Likelihood
likelihood = likelihoods.Gaussian(variance=np.var(Y)*0.01)
from ..inference.latent_function_inference import VarDTC_SVI_Multiout
inference_method = VarDTC_SVI_Multiout()
super(GPMultioutRegression,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method)
self.link_parameters(self.kern_row, self.X_row, self.Z_row,self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag, self.qU_var_r_W, self.qU_var_r_diag)
self._log_marginal_likelihood = np.nan
def parameters_changed(self):
qU_var_c = tdot(self.qU_var_c_W) + np.diag(self.qU_var_c_diag)
qU_var_r = tdot(self.qU_var_r_W) + np.diag(self.qU_var_r_diag)
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern_row, self.kern, self.X_row, self.X, self.Z_row, self.Z, self.likelihood, self.Y, self.qU_mean ,qU_var_r, qU_var_c)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
self.qU_mean.gradient[:] = self.grad_dict['dL_dqU_mean']
self.qU_var_c_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_c'])
self.qU_var_c_W.gradient[:] = (self.grad_dict['dL_dqU_var_c']+self.grad_dict['dL_dqU_var_c'].T).dot(self.qU_var_c_W)
self.qU_var_r_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_r'])
self.qU_var_r_W.gradient[:] = (self.grad_dict['dL_dqU_var_r']+self.grad_dict['dL_dqU_var_r'].T).dot(self.qU_var_r_W)
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag_c'], self.X)
kerngrad = self.kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKfu_c'], self.X, self.Z)
kerngrad += self.kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKuu_c'], self.Z, None)
self.kern.gradient += kerngrad
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKuu_c'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKfu_c'].T, self.Z, self.X)
#gradients wrt kernel
self.kern_row.update_gradients_full(self.grad_dict['dL_dKuu_r'], self.Z_row, None)
kerngrad = self.kern_row.gradient.copy()
self.kern_row.update_gradients_expectations(variational_posterior=self.X_row,
Z=self.Z_row,
dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
self.kern_row.gradient += kerngrad
#gradients wrt Z
self.Z_row.gradient = self.kern_row.gradients_X(self.grad_dict['dL_dKuu_r'], self.Z_row)
self.Z_row.gradient += self.kern_row.gradients_Z_expectations(
self.grad_dict['dL_dpsi0_r'],
self.grad_dict['dL_dpsi1_r'],
self.grad_dict['dL_dpsi2_r'],
Z=self.Z_row,
variational_posterior=self.X_row)
self._log_marginal_likelihood -= self.variational_prior_row.KL_divergence(self.X_row)
self.X_row.mean.gradient, self.X_row.variance.gradient = self.kern_row.gradients_qX_expectations(
variational_posterior=self.X_row,
Z=self.Z_row,
dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
self.variational_prior_row.update_gradients_KL(self.X_row)
def optimize_auto(self,max_iters=10000,verbose=True):
self.Z.fix(warning=False)
self.kern.fix(warning=False)
self.kern_row.fix(warning=False)
self.Zr.fix(warning=False)
self.Xr.fix(warning=False)
self.optimize(max_iters=int(0.1*max_iters),messages=verbose)
self.unfix()
self.optimize(max_iters=max_iters,messages=verbose)

View file

@ -0,0 +1,173 @@
# Copyright (c) 2017 Zhenwen Dai
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from .. import likelihoods
from .. import kern
from .. import util
from GPy.core.parameterization.variational import NormalPosterior, NormalPrior
from ..core.parameterization.param import Param
from paramz.transformations import Logexp
from ..util.linalg import tdot
from .sparse_gp_regression_md import SparseGPRegressionMD
class GPMultioutRegressionMD(SparseGP):
"""
Gaussian Process model for scalable multioutput regression
This is a thin wrapper around the models.GP class, with a set of sensible defaults
"""
def __init__(self, X, Y, indexD, Xr_dim, kernel=None, kernel_row=None, likelihood=None, Z=None, Z_row=None, X_row=None, Xvariance_row=None, num_inducing=(10,10), qU_var_r_W_dim=None, qU_var_c_W_dim=None, init='GP', heter_noise=False, name='GPMR'):
assert len(Y.shape)==1 or Y.shape[1]==1
self.output_dim = int(np.max(indexD))+1
self.heter_noise = heter_noise
self.indexD = indexD
#Kernel
if kernel is None:
kernel = kern.RBF(X.shape[1])
if kernel_row is None:
kernel_row = kern.RBF(Xr_dim,name='kern_row')
if init=='GP':
from . import SparseGPRegression, BayesianGPLVM
from ..util.linalg import jitchol
Mc, Mr = num_inducing
print('Intializing with GP...')
print('Fit Sparse GP...')
m_sgp = SparseGPRegressionMD(X,Y,indexD,kernel=kernel.copy(),num_inducing=Mc)
m_sgp.likelihood.variance[:] = Y.var()*0.01
m_sgp.optimize(max_iters=1000)
print('Fit BGPLVM...')
m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,Xr_dim,kernel=kernel_row.copy(), num_inducing=Mr)
m_lvm.likelihood.variance[:] = m_lvm.Y.var()*0.01
m_lvm.optimize(max_iters=10000)
kernel[:] = m_sgp.kern.param_array.copy()
kernel.variance[:] = np.sqrt(kernel.variance)
Z = m_sgp.Z.values.copy()
kernel_row[:] = m_lvm.kern.param_array.copy()
kernel_row.variance[:] = np.sqrt(kernel_row.variance)
Z_row = m_lvm.Z.values.copy()
X_row = m_lvm.X.mean.values.copy()
Xvariance_row = m_lvm.X.variance.values
qU_mean = m_lvm.posterior.mean.T.copy()
qU_var_col_W = jitchol(m_sgp.posterior.covariance)
qU_var_col_diag = np.full(Mc,1e-5)
qU_var_row_W = jitchol(m_lvm.posterior.covariance)
qU_var_row_diag = np.full(Mr,1e-5)
print('Done.')
else:
qU_mean = np.zeros(num_inducing)
qU_var_col_W = np.random.randn(num_inducing[0],num_inducing[0] if qU_var_c_W_dim is None else qU_var_c_W_dim)*0.01
qU_var_col_diag = np.full(num_inducing[0],1e-5)
qU_var_row_W = np.random.randn(num_inducing[1],num_inducing[1] if qU_var_r_W_dim is None else qU_var_r_W_dim)*0.01
qU_var_row_diag = np.full(num_inducing[1],1e-5)
if Z is None:
Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
if X_row is None:
X_row = np.random.randn(self.output_dim,Xr_dim)
if Xvariance_row is None:
Xvariance_row = np.ones((self.output_dim,Xr_dim))*0.0001
if Z_row is None:
Z_row = X_row[np.random.permutation(X_row.shape[0])[:num_inducing[1]]].copy()
self.kern_row = kernel_row
self.X_row = NormalPosterior(X_row, Xvariance_row,name='Xr')
self.Z_row = Param('Zr', Z_row)
self.variational_prior_row = NormalPrior()
self.qU_mean = Param('qU_mean', qU_mean)
self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag, Logexp())
self.qU_var_r_W = Param('qU_var_row_W',qU_var_row_W)
self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag, Logexp())
#Likelihood
if heter_noise:
likelihood = likelihoods.Gaussian(variance=np.array([np.var(Y[indexD==d]) for d in range(self.output_dim)])*0.01)
else:
likelihood = likelihoods.Gaussian(variance=np.var(Y)*0.01)
from ..inference.latent_function_inference.vardtc_svi_multiout_miss import VarDTC_SVI_Multiout_Miss
inference_method = VarDTC_SVI_Multiout_Miss()
super(GPMultioutRegressionMD,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method)
self.output_dim = int(np.max(indexD))+1
self.link_parameters(self.kern_row, self.X_row, self.Z_row,self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag, self.qU_var_r_W, self.qU_var_r_diag)
self._log_marginal_likelihood = np.nan
def parameters_changed(self):
qU_var_c = tdot(self.qU_var_c_W) + np.diag(self.qU_var_c_diag)
qU_var_r = tdot(self.qU_var_r_W) + np.diag(self.qU_var_r_diag)
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern_row, self.kern, self.X_row, self.X, self.Z_row, self.Z, self.likelihood, self.Y, self.qU_mean ,qU_var_r, qU_var_c, self.indexD, self.output_dim)
# import pdb;pdb.set_trace()
if self.heter_noise:
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
else:
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'].sum())
self.qU_mean.gradient[:] = self.grad_dict['dL_dqU_mean']
self.qU_var_c_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_c'])
self.qU_var_c_W.gradient[:] = (self.grad_dict['dL_dqU_var_c']+self.grad_dict['dL_dqU_var_c'].T).dot(self.qU_var_c_W)
self.qU_var_r_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_r'])
self.qU_var_r_W.gradient[:] = (self.grad_dict['dL_dqU_var_r']+self.grad_dict['dL_dqU_var_r'].T).dot(self.qU_var_r_W)
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag_c'], self.X)
kerngrad = self.kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKfu_c'], self.X, self.Z)
kerngrad += self.kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKuu_c'], self.Z, None)
self.kern.gradient += kerngrad
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKuu_c'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKfu_c'].T, self.Z, self.X)
#gradients wrt kernel
self.kern_row.update_gradients_full(self.grad_dict['dL_dKuu_r'], self.Z_row, None)
kerngrad = self.kern_row.gradient.copy()
self.kern_row.update_gradients_expectations(variational_posterior=self.X_row,
Z=self.Z_row,
dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
self.kern_row.gradient += kerngrad
#gradients wrt Z
self.Z_row.gradient = self.kern_row.gradients_X(self.grad_dict['dL_dKuu_r'], self.Z_row)
self.Z_row.gradient += self.kern_row.gradients_Z_expectations(
self.grad_dict['dL_dpsi0_r'],
self.grad_dict['dL_dpsi1_r'],
self.grad_dict['dL_dpsi2_r'],
Z=self.Z_row,
variational_posterior=self.X_row)
self._log_marginal_likelihood -= self.variational_prior_row.KL_divergence(self.X_row)
self.X_row.mean.gradient, self.X_row.variance.gradient = self.kern_row.gradients_qX_expectations(
variational_posterior=self.X_row,
Z=self.Z_row,
dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
self.variational_prior_row.update_gradients_KL(self.X_row)
def optimize_auto(self,max_iters=10000,verbose=True):
self.Z.fix(warning=False)
self.kern.fix(warning=False)
self.kern_row.fix(warning=False)
self.Zr.fix(warning=False)
self.Xr.fix(warning=False)
self.optimize(max_iters=int(0.1*max_iters),messages=verbose)
self.unfix()
self.optimize(max_iters=max_iters,messages=verbose)

View file

@ -0,0 +1,64 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.sparse_gp_mpi import SparseGP_MPI
from .. import likelihoods
from .. import kern
from ..inference.latent_function_inference.vardtc_md import VarDTC_MD
from GPy.core.parameterization.variational import NormalPosterior
class SparseGPRegressionMD(SparseGP_MPI):
"""
Sparse Gaussian Process Regression with Missing Data
"""
def __init__(self, X, Y, indexD, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None, individual_Y_noise=False, name='sparse_gp'):
assert len(Y.shape)==1 or Y.shape[1]==1
self.individual_Y_noise = individual_Y_noise
self.indexD = indexD
output_dim = int(np.max(indexD))+1
num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.RBF(input_dim)# + kern.white(input_dim, variance=1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
Z = X.view(np.ndarray)[i].copy()
else:
assert Z.shape[1] == input_dim
if individual_Y_noise:
likelihood = likelihoods.Gaussian(variance=np.array([np.var(Y[indexD==d]) for d in range(output_dim)])*0.01)
else:
likelihood = likelihoods.Gaussian(variance=np.var(Y)*0.01)
if not (X_variance is None):
X = NormalPosterior(X,X_variance)
infr = VarDTC_MD()
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm, name=name)
self.output_dim = output_dim
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.indexD, self.output_dim, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'] if self.individual_Y_noise else self.grad_dict['dL_dthetaL'].sum())
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
kerngrad = self.kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
kerngrad += self.kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern.gradient += kerngrad
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)

View file

@ -961,6 +961,127 @@ class GradientTests(np.testing.TestCase):
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_multiout_regression(self):
np.random.seed(0)
import GPy
N = 10
N_train = 5
D = 4
noise_var = .3
k = GPy.kern.RBF(1,lengthscale=0.1)
x = np.random.rand(N,1)
cov = k.K(x)
k_r = GPy.kern.RBF(2,lengthscale=.4)
x_r = np.random.rand(D,2)
cov_r = k_r.K(x_r)
cov_all = np.kron(cov_r,cov)
L = GPy.util.linalg.jitchol(cov_all)
y_latent = L.dot(np.random.randn(N*D)).reshape(D,N).T
x_test = x[N_train:]
y_test = y_latent[N_train:]
x = x[:N_train]
y = y_latent[:N_train]+np.random.randn(N_train,D)*np.sqrt(noise_var)
Mr = D
Mc = x.shape[0]
Qr = 5
Qc = x.shape[1]
m_mr = GPy.models.GPMultioutRegression(x,y,Xr_dim=Qr, kernel_row=GPy.kern.RBF(Qr,ARD=True), num_inducing=(Mc,Mr),init='GP')
m_mr.optimize_auto(max_iters=1)
m_mr.randomize()
self.assertTrue(m_mr.checkgrad())
m_mr = GPy.models.GPMultioutRegression(x,y,Xr_dim=Qr, kernel_row=GPy.kern.RBF(Qr,ARD=True), num_inducing=(Mc,Mr),init='rand')
m_mr.optimize_auto(max_iters=1)
m_mr.randomize()
self.assertTrue(m_mr.checkgrad())
def test_multiout_regression_md(self):
import GPy
np.random.seed(0)
N = 20
N_train = 5
D = 8
noise_var = 0.3
k = GPy.kern.RBF(1,lengthscale=0.1)
x_raw = np.random.rand(N*D,1)
# dimension assignment
D_list = []
for i in range(2):
while True:
D_sub_list = []
ratios = []
r_p = 0.
for j in range(3):
ratios.append(np.random.rand()*(1-r_p)+r_p)
D_sub_list.append(int((ratios[-1]-r_p)*4*N_train))
r_p = ratios[-1]
D_sub_list.append(4*N_train - np.sum(D_sub_list))
if (np.array(D_sub_list)!=0).all():
D_list.extend([a+N-N_train for a in D_sub_list])
break
cov = k.K(x_raw)
k_r = GPy.kern.RBF(2,lengthscale=.4)
x_r = np.random.rand(D,2)
cov_r = k_r.K(x_r)
cov_all = np.repeat(np.repeat(cov_r,D_list,axis=0),D_list,axis=1)*cov
L = GPy.util.linalg.jitchol(cov_all)
y_latent = L.dot(np.random.randn(N*D))
x = np.zeros((D*N_train,))
y = np.zeros((D*N_train,))
x_test = np.zeros((D*(N-N_train),))
y_test = np.zeros((D*(N-N_train),))
indexD = np.zeros((D*N_train),dtype=np.int)
indexD_test = np.zeros((D*(N-N_train)),dtype=np.int)
offset_all = 0
offset_train = 0
offset_test = 0
for i in range(D):
D_test = N-N_train
D_train = D_list[i] - N+N_train
y[offset_train:offset_train+D_train] = y_latent[offset_all:offset_all+D_train]
x[offset_train:offset_train+D_train] = x_raw[offset_all:offset_all+D_train,0]
y_test[offset_test:offset_test+D_test] = y_latent[offset_all+D_train:offset_all+D_train+D_test]
x_test[offset_test:offset_test+D_test] = x_raw[offset_all+D_train:offset_all+D_train+D_test,0]
indexD[offset_train:offset_train+D_train] = i
indexD_test[offset_test:offset_test+D_test] = i
offset_train += D_train
offset_test += D_test
offset_all += D_train+D_test
y_noisefree = y.copy()
y += np.random.randn(*y.shape)*np.sqrt(noise_var)
x_flat = x.flatten()[:,None]
y_flat = y.flatten()[:,None]
Mr, Mc, Qr, Qc = 4,3,2,1
m = GPy.models.GPMultioutRegressionMD(x_flat,y_flat,indexD,Xr_dim=Qr, kernel_row=GPy.kern.RBF(Qr,ARD=False), num_inducing=(Mc,Mr))
m.optimize_auto(max_iters=1)
m.randomize()
self.assertTrue(m.checkgrad())
m = GPy.models.GPMultioutRegressionMD(x_flat,y_flat,indexD,Xr_dim=Qr, kernel_row=GPy.kern.RBF(Qr,ARD=False), num_inducing=(Mc,Mr),init='rand')
m.optimize_auto(max_iters=1)
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__": if __name__ == "__main__":
print("Running unit tests, please be (very) patient...") print("Running unit tests, please be (very) patient...")
unittest.main() unittest.main()