mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-03 08:42:39 +02:00
Merge pull request #562 from SheffieldML/external-mo
Release the implementation of LVMOGP
This commit is contained in:
commit
8af7dedf4b
9 changed files with 1299 additions and 8 deletions
|
|
@ -91,6 +91,8 @@ from .pep import PEP
|
||||||
from .var_dtc_parallel import VarDTC_minibatch
|
from .var_dtc_parallel import VarDTC_minibatch
|
||||||
from .var_gauss import VarGauss
|
from .var_gauss import VarGauss
|
||||||
from .gaussian_grid_inference import GaussianGridInference
|
from .gaussian_grid_inference import GaussianGridInference
|
||||||
|
from .vardtc_svi_multiout import VarDTC_SVI_Multiout
|
||||||
|
from .vardtc_svi_multiout_miss import VarDTC_SVI_Multiout_Miss
|
||||||
|
|
||||||
|
|
||||||
# class FullLatentFunctionData(object):
|
# class FullLatentFunctionData(object):
|
||||||
|
|
|
||||||
177
GPy/inference/latent_function_inference/vardtc_md.py
Normal file
177
GPy/inference/latent_function_inference/vardtc_md.py
Normal file
|
|
@ -0,0 +1,177 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
from GPy.util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv, dpotri
|
||||||
|
from GPy.util import diag
|
||||||
|
from GPy.core.parameterization.variational import VariationalPosterior
|
||||||
|
import numpy as np
|
||||||
|
from GPy.inference.latent_function_inference import LatentFunctionInference
|
||||||
|
from GPy.inference.latent_function_inference.posterior import Posterior
|
||||||
|
|
||||||
|
log_2_pi = np.log(2*np.pi)
|
||||||
|
|
||||||
|
class VarDTC_MD(LatentFunctionInference):
|
||||||
|
"""
|
||||||
|
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||||
|
|
||||||
|
The function self.inference returns a Posterior object, which summarizes
|
||||||
|
the posterior.
|
||||||
|
|
||||||
|
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
|
||||||
|
|
||||||
|
"""
|
||||||
|
const_jitter = 1e-6
|
||||||
|
|
||||||
|
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs):
|
||||||
|
|
||||||
|
if uncertain_inputs:
|
||||||
|
psi0 = kern.psi0(Z, X)
|
||||||
|
psi1 = kern.psi1(Z, X)
|
||||||
|
psi2 = kern.psi2n(Z, X)
|
||||||
|
else:
|
||||||
|
psi0 = kern.Kdiag(X)
|
||||||
|
psi1 = kern.K(X, Z)
|
||||||
|
psi2 = psi1[:,:,None]*psi1[:,None,:]
|
||||||
|
|
||||||
|
return psi0, psi1, psi2
|
||||||
|
|
||||||
|
def inference(self, kern, X, Z, likelihood, Y, indexD, output_dim, Y_metadata=None, Lm=None, dL_dKmm=None, Kuu_sigma=None):
|
||||||
|
"""
|
||||||
|
The first phase of inference:
|
||||||
|
Compute: log-likelihood, dL_dKmm
|
||||||
|
|
||||||
|
Cached intermediate results: Kmm, KmmInv,
|
||||||
|
"""
|
||||||
|
|
||||||
|
input_dim = Z.shape[0]
|
||||||
|
|
||||||
|
uncertain_inputs = isinstance(X, VariationalPosterior)
|
||||||
|
|
||||||
|
beta = 1./likelihood.variance
|
||||||
|
if len(beta)==1:
|
||||||
|
beta = np.zeros(output_dim)+beta
|
||||||
|
|
||||||
|
beta_exp = np.zeros(indexD.shape[0])
|
||||||
|
for d in range(output_dim):
|
||||||
|
beta_exp[indexD==d] = beta[d]
|
||||||
|
|
||||||
|
psi0, psi1, psi2 = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
|
||||||
|
|
||||||
|
psi2_sum = (beta_exp[:,None,None]*psi2).sum(0)/output_dim
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute Common Components
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
Kmm = kern.K(Z).copy()
|
||||||
|
if Kuu_sigma is not None:
|
||||||
|
diag.add(Kmm, Kuu_sigma)
|
||||||
|
else:
|
||||||
|
diag.add(Kmm, self.const_jitter)
|
||||||
|
Lm = jitchol(Kmm)
|
||||||
|
|
||||||
|
logL = 0.
|
||||||
|
dL_dthetaL = np.zeros(output_dim)
|
||||||
|
dL_dKmm = np.zeros_like(Kmm)
|
||||||
|
dL_dpsi0 = np.zeros_like(psi0)
|
||||||
|
dL_dpsi1 = np.zeros_like(psi1)
|
||||||
|
dL_dpsi2 = np.zeros_like(psi2)
|
||||||
|
wv = np.empty((Kmm.shape[0],output_dim))
|
||||||
|
|
||||||
|
for d in range(output_dim):
|
||||||
|
idx_d = indexD==d
|
||||||
|
Y_d = Y[idx_d]
|
||||||
|
N_d = Y_d.shape[0]
|
||||||
|
beta_d = beta[d]
|
||||||
|
|
||||||
|
psi2_d = psi2[idx_d].sum(0)*beta_d
|
||||||
|
psi1Y = Y_d.T.dot(psi1[idx_d])*beta_d
|
||||||
|
psi0_d = psi0[idx_d].sum()*beta_d
|
||||||
|
YRY_d = np.square(Y_d).sum()*beta_d
|
||||||
|
|
||||||
|
LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_d, 'right')
|
||||||
|
|
||||||
|
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
|
||||||
|
LL = jitchol(Lambda)
|
||||||
|
LmLL = Lm.dot(LL)
|
||||||
|
|
||||||
|
b = dtrtrs(LmLL, psi1Y.T)[0].T
|
||||||
|
bbt = np.square(b).sum()
|
||||||
|
v = dtrtrs(LmLL, b.T, trans=1)[0].T
|
||||||
|
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
|
||||||
|
|
||||||
|
tmp = -backsub_both_sides(LL, LLinvPsi1TYYTPsi1LLinvT)
|
||||||
|
dL_dpsi2R = backsub_both_sides(Lm, tmp+np.eye(input_dim))/2
|
||||||
|
|
||||||
|
logL_R = -N_d*np.log(beta_d)
|
||||||
|
logL += -((N_d*log_2_pi+logL_R+psi0_d-np.trace(LmInvPsi2LmInvT))+YRY_d- bbt)/2.
|
||||||
|
|
||||||
|
dL_dKmm += dL_dpsi2R - backsub_both_sides(Lm, LmInvPsi2LmInvT)/2
|
||||||
|
|
||||||
|
dL_dthetaL[d:d+1] = (YRY_d*beta_d + beta_d*psi0_d - N_d*beta_d)/2. - beta_d*(dL_dpsi2R*psi2_d).sum() - beta_d*np.trace(LLinvPsi1TYYTPsi1LLinvT)
|
||||||
|
|
||||||
|
dL_dpsi0[idx_d] = -beta_d/2.
|
||||||
|
dL_dpsi1[idx_d] = beta_d*np.dot(Y_d,v)
|
||||||
|
dL_dpsi2[idx_d] = beta_d*dL_dpsi2R
|
||||||
|
wv[:,d] = v
|
||||||
|
|
||||||
|
LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_sum, 'right')
|
||||||
|
|
||||||
|
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
|
||||||
|
LL = jitchol(Lambda)
|
||||||
|
LmLL = Lm.dot(LL)
|
||||||
|
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
|
||||||
|
dL_dpsi2R_common = dpotri(LmLL)[0]/-2.
|
||||||
|
dL_dpsi2 += dL_dpsi2R_common[None,:,:]*beta_exp[:,None,None]
|
||||||
|
|
||||||
|
for d in range(output_dim):
|
||||||
|
dL_dthetaL[d] += (dL_dpsi2R_common*psi2[indexD==d].sum(0)).sum()*-beta[d]*beta[d]
|
||||||
|
|
||||||
|
dL_dKmm += dL_dpsi2R_common*output_dim
|
||||||
|
|
||||||
|
logL += -output_dim*logdet_L/2.
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dKmm
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
# dL_dKmm = dL_dpsi2R - output_dim* backsub_both_sides(Lm, LmInvPsi2LmInvT)/2 #LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute the Posterior distribution of inducing points p(u|Y)
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
LLInvLmT = dtrtrs(LL, Lm.T)[0]
|
||||||
|
cov = tdot(LLInvLmT.T)
|
||||||
|
|
||||||
|
wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
|
||||||
|
post = Posterior(woodbury_inv=wd_inv, woodbury_vector=wv, K=Kmm, mean=None, cov=cov, K_chol=Lm)
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dthetaL for uncertian input and non-heter noise
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
# for d in range(output_dim):
|
||||||
|
# dL_dthetaL[d:d+1] += - beta[d]*beta[d]*(dL_dpsi2R[None,:,:] * psi2[indexD==d]/output_dim).sum()
|
||||||
|
# dL_dthetaL += - (dL_dpsi2R[None,:,:] * psi2_sum*D beta*(dL_dpsi2R*psi2).sum()
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dpsi
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
if not uncertain_inputs:
|
||||||
|
dL_dpsi1 += (psi1[:,None,:]*dL_dpsi2).sum(2)*2.
|
||||||
|
|
||||||
|
if uncertain_inputs:
|
||||||
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
|
'dL_dpsi0':dL_dpsi0,
|
||||||
|
'dL_dpsi1':dL_dpsi1,
|
||||||
|
'dL_dpsi2':dL_dpsi2,
|
||||||
|
'dL_dthetaL':dL_dthetaL}
|
||||||
|
else:
|
||||||
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
|
'dL_dKdiag':dL_dpsi0,
|
||||||
|
'dL_dKnm':dL_dpsi1,
|
||||||
|
'dL_dthetaL':dL_dthetaL}
|
||||||
|
|
||||||
|
return post, logL, grad_dict
|
||||||
271
GPy/inference/latent_function_inference/vardtc_svi_multiout.py
Normal file
271
GPy/inference/latent_function_inference/vardtc_svi_multiout.py
Normal file
|
|
@ -0,0 +1,271 @@
|
||||||
|
#from .posterior import Posterior
|
||||||
|
from GPy.util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv, dpotri
|
||||||
|
from GPy.util import diag
|
||||||
|
from GPy.core.parameterization.variational import VariationalPosterior
|
||||||
|
import numpy as np
|
||||||
|
from GPy.inference.latent_function_inference import LatentFunctionInference
|
||||||
|
from GPy.inference.latent_function_inference.posterior import Posterior
|
||||||
|
log_2_pi = np.log(2*np.pi)
|
||||||
|
|
||||||
|
|
||||||
|
class VarDTC_SVI_Multiout(LatentFunctionInference):
|
||||||
|
"""
|
||||||
|
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||||
|
|
||||||
|
The function self.inference returns a Posterior object, which summarizes
|
||||||
|
the posterior.
|
||||||
|
|
||||||
|
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
|
||||||
|
|
||||||
|
"""
|
||||||
|
const_jitter = 1e-6
|
||||||
|
|
||||||
|
def get_trYYT(self, Y):
|
||||||
|
return np.sum(np.square(Y))
|
||||||
|
|
||||||
|
def get_YYTfactor(self, Y):
|
||||||
|
N, D = Y.shape
|
||||||
|
if (N>=D):
|
||||||
|
return Y.view(np.ndarray)
|
||||||
|
else:
|
||||||
|
return jitchol(tdot(Y))
|
||||||
|
|
||||||
|
def gatherPsiStat(self, kern, X, Z, uncertain_inputs):
|
||||||
|
|
||||||
|
if uncertain_inputs:
|
||||||
|
psi0 = kern.psi0(Z, X).sum()
|
||||||
|
psi1 = kern.psi1(Z, X)
|
||||||
|
psi2 = kern.psi2(Z, X)
|
||||||
|
else:
|
||||||
|
psi0 = kern.Kdiag(X).sum()
|
||||||
|
psi1 = kern.K(X, Z)
|
||||||
|
psi2 = tdot(psi1.T)
|
||||||
|
|
||||||
|
return psi0, psi1, psi2
|
||||||
|
|
||||||
|
def inference(self, kern_r, kern_c, Xr, Xc, Zr, Zc, likelihood, Y, qU_mean ,qU_var_r, qU_var_c):
|
||||||
|
"""
|
||||||
|
The SVI-VarDTC inference
|
||||||
|
"""
|
||||||
|
|
||||||
|
N, D, Mr, Mc, Qr, Qc = Y.shape[0], Y.shape[1], Zr.shape[0], Zc.shape[0], Zr.shape[1], Zc.shape[1]
|
||||||
|
|
||||||
|
uncertain_inputs_r = isinstance(Xr, VariationalPosterior)
|
||||||
|
uncertain_inputs_c = isinstance(Xc, VariationalPosterior)
|
||||||
|
uncertain_outputs = isinstance(Y, VariationalPosterior)
|
||||||
|
|
||||||
|
beta = 1./likelihood.variance
|
||||||
|
|
||||||
|
psi0_r, psi1_r, psi2_r = self.gatherPsiStat(kern_r, Xr, Zr, uncertain_inputs_r)
|
||||||
|
psi0_c, psi1_c, psi2_c = self.gatherPsiStat(kern_c, Xc, Zc, uncertain_inputs_c)
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute Common Components
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
Kuu_r = kern_r.K(Zr).copy()
|
||||||
|
diag.add(Kuu_r, self.const_jitter)
|
||||||
|
Lr = jitchol(Kuu_r)
|
||||||
|
|
||||||
|
Kuu_c = kern_c.K(Zc).copy()
|
||||||
|
diag.add(Kuu_c, self.const_jitter)
|
||||||
|
Lc = jitchol(Kuu_c)
|
||||||
|
|
||||||
|
mu, Sr, Sc = qU_mean, qU_var_r, qU_var_c
|
||||||
|
LSr = jitchol(Sr)
|
||||||
|
LSc = jitchol(Sc)
|
||||||
|
|
||||||
|
LcInvMLrInvT = dtrtrs(Lc,dtrtrs(Lr,mu.T)[0].T)[0]
|
||||||
|
LcInvPsi2_cLcInvT = backsub_both_sides(Lc, psi2_c,'right')
|
||||||
|
LrInvPsi2_rLrInvT = backsub_both_sides(Lr, psi2_r,'right')
|
||||||
|
LcInvLSc = dtrtrs(Lc, LSc)[0]
|
||||||
|
LrInvLSr = dtrtrs(Lr, LSr)[0]
|
||||||
|
LcInvScLcInvT = tdot(LcInvLSc)
|
||||||
|
LrInvSrLrInvT = tdot(LrInvLSr)
|
||||||
|
LcInvPsi1_cT = dtrtrs(Lc, psi1_c.T)[0]
|
||||||
|
LrInvPsi1_rT = dtrtrs(Lr, psi1_r.T)[0]
|
||||||
|
|
||||||
|
tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT = (LrInvPsi2_rLrInvT*LrInvSrLrInvT).sum()
|
||||||
|
tr_LcInvPsi2_cLcInvT_LcInvScLcInvT = (LcInvPsi2_cLcInvT*LcInvScLcInvT).sum()
|
||||||
|
tr_LrInvSrLrInvT = np.square(LrInvLSr).sum()
|
||||||
|
tr_LcInvScLcInvT = np.square(LcInvLSc).sum()
|
||||||
|
tr_LrInvPsi2_rLrInvT = np.trace(LrInvPsi2_rLrInvT)
|
||||||
|
tr_LcInvPsi2_cLcInvT = np.trace(LcInvPsi2_cLcInvT)
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute log-likelihood
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
logL_A = - np.square(Y).sum() \
|
||||||
|
- (LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT)*LrInvPsi2_rLrInvT).sum() \
|
||||||
|
- tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT \
|
||||||
|
+ 2 * (Y * LcInvPsi1_cT.T.dot(LcInvMLrInvT).dot(LrInvPsi1_rT)).sum() - psi0_c * psi0_r \
|
||||||
|
+ tr_LrInvPsi2_rLrInvT * tr_LcInvPsi2_cLcInvT
|
||||||
|
|
||||||
|
logL = -N*D/2.*(np.log(2.*np.pi)-np.log(beta)) + beta/2.* logL_A \
|
||||||
|
-Mc * (np.log(np.diag(Lr)).sum()-np.log(np.diag(LSr)).sum()) -Mr * (np.log(np.diag(Lc)).sum()-np.log(np.diag(LSc)).sum()) \
|
||||||
|
- np.square(LcInvMLrInvT).sum()/2. - tr_LrInvSrLrInvT * tr_LcInvScLcInvT/2. + Mr*Mc/2.
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dKuu
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
tmp = beta* LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) \
|
||||||
|
+ beta* tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT.dot(LcInvScLcInvT) \
|
||||||
|
- beta* LcInvMLrInvT.dot(LrInvPsi1_rT).dot(Y.T).dot(LcInvPsi1_cT.T) \
|
||||||
|
- beta/2. * tr_LrInvPsi2_rLrInvT* LcInvPsi2_cLcInvT - Mr/2.*np.eye(Mc) \
|
||||||
|
+ tdot(LcInvMLrInvT)/2. + tr_LrInvSrLrInvT/2. * LcInvScLcInvT
|
||||||
|
|
||||||
|
dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
|
||||||
|
dL_dKuu_c += dL_dKuu_c.T
|
||||||
|
dL_dKuu_c *= 0.5
|
||||||
|
|
||||||
|
tmp = beta* LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT) \
|
||||||
|
+ beta* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT.dot(LrInvSrLrInvT) \
|
||||||
|
- beta* LrInvPsi1_rT.dot(Y.T).dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT) \
|
||||||
|
- beta/2. * tr_LcInvPsi2_cLcInvT * LrInvPsi2_rLrInvT - Mc/2.*np.eye(Mr) \
|
||||||
|
+ tdot(LcInvMLrInvT.T)/2. + tr_LcInvScLcInvT/2. * LrInvSrLrInvT
|
||||||
|
|
||||||
|
dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
|
||||||
|
dL_dKuu_r += dL_dKuu_r.T
|
||||||
|
dL_dKuu_r *= 0.5
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dthetaL
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
dL_dthetaL = -D*N*beta/2. - logL_A*beta*beta/2.
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dqU
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
tmp = -beta * LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT)\
|
||||||
|
+ beta* LcInvPsi1_cT.dot(Y).dot(LrInvPsi1_rT.T) - LcInvMLrInvT
|
||||||
|
|
||||||
|
dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]
|
||||||
|
|
||||||
|
LScInv = dtrtri(LSc)
|
||||||
|
tmp = -beta/2.*tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT -tr_LrInvSrLrInvT/2.*np.eye(Mc)
|
||||||
|
dL_dqU_var_c = backsub_both_sides(Lc, tmp, 'left') + tdot(LScInv.T) * Mr/2.
|
||||||
|
|
||||||
|
LSrInv = dtrtri(LSr)
|
||||||
|
tmp = -beta/2.*tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT -tr_LcInvScLcInvT/2.*np.eye(Mr)
|
||||||
|
dL_dqU_var_r = backsub_both_sides(Lr, tmp, 'left') + tdot(LSrInv.T) * Mc/2.
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute the Posterior distribution of inducing points p(u|Y)
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
post = PosteriorMultioutput(LcInvMLrInvT=LcInvMLrInvT, LcInvScLcInvT=LcInvScLcInvT,
|
||||||
|
LrInvSrLrInvT=LrInvSrLrInvT, Lr=Lr, Lc=Lc, kern_r=kern_r, Xr=Xr, Zr=Zr)
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dpsi
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
dL_dpsi0_r = - psi0_c * beta/2. * np.ones((D,))
|
||||||
|
dL_dpsi0_c = - psi0_r * beta/2. * np.ones((N,))
|
||||||
|
|
||||||
|
dL_dpsi1_c = beta * dtrtrs(Lc, (Y.dot(LrInvPsi1_rT.T).dot(LcInvMLrInvT.T)).T, trans=1)[0].T
|
||||||
|
dL_dpsi1_r = beta * dtrtrs(Lr, (Y.T.dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT)).T, trans=1)[0].T
|
||||||
|
|
||||||
|
tmp = beta/2.*(-LcInvMLrInvT.dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) - tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvScLcInvT
|
||||||
|
+tr_LrInvPsi2_rLrInvT *np.eye(Mc))
|
||||||
|
dL_dpsi2_c = backsub_both_sides(Lc, tmp, 'left')
|
||||||
|
tmp = beta/2.*(-LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT) - tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvSrLrInvT
|
||||||
|
+tr_LcInvPsi2_cLcInvT *np.eye(Mr))
|
||||||
|
dL_dpsi2_r = backsub_both_sides(Lr, tmp, 'left')
|
||||||
|
|
||||||
|
if not uncertain_inputs_r:
|
||||||
|
dL_dpsi1_r += psi1_r.dot(dL_dpsi2_r+dL_dpsi2_r.T)
|
||||||
|
if not uncertain_inputs_c:
|
||||||
|
dL_dpsi1_c += psi1_c.dot(dL_dpsi2_c+dL_dpsi2_c.T)
|
||||||
|
|
||||||
|
grad_dict = {
|
||||||
|
'dL_dthetaL':dL_dthetaL,
|
||||||
|
'dL_dqU_mean':dL_dqU_mean,
|
||||||
|
'dL_dqU_var_c':dL_dqU_var_c,
|
||||||
|
'dL_dqU_var_r':dL_dqU_var_r,
|
||||||
|
'dL_dKuu_c': dL_dKuu_c,
|
||||||
|
'dL_dKuu_r': dL_dKuu_r,
|
||||||
|
}
|
||||||
|
|
||||||
|
if uncertain_inputs_c:
|
||||||
|
grad_dict['dL_dpsi0_c'] = dL_dpsi0_c
|
||||||
|
grad_dict['dL_dpsi1_c'] = dL_dpsi1_c
|
||||||
|
grad_dict['dL_dpsi2_c'] = dL_dpsi2_c
|
||||||
|
else:
|
||||||
|
grad_dict['dL_dKdiag_c'] = dL_dpsi0_c
|
||||||
|
grad_dict['dL_dKfu_c'] = dL_dpsi1_c
|
||||||
|
|
||||||
|
if uncertain_inputs_r:
|
||||||
|
grad_dict['dL_dpsi0_r'] = dL_dpsi0_r
|
||||||
|
grad_dict['dL_dpsi1_r'] = dL_dpsi1_r
|
||||||
|
grad_dict['dL_dpsi2_r'] = dL_dpsi2_r
|
||||||
|
else:
|
||||||
|
grad_dict['dL_dKdiag_r'] = dL_dpsi0_r
|
||||||
|
grad_dict['dL_dKfu_r'] = dL_dpsi1_r
|
||||||
|
|
||||||
|
return post, logL, grad_dict
|
||||||
|
|
||||||
|
class PosteriorMultioutput(object):
|
||||||
|
|
||||||
|
def __init__(self,LcInvMLrInvT, LcInvScLcInvT, LrInvSrLrInvT, Lr, Lc, kern_r, Xr, Zr):
|
||||||
|
self.LcInvMLrInvT = LcInvMLrInvT
|
||||||
|
self.LcInvScLcInvT = LcInvScLcInvT
|
||||||
|
self.LrInvSrLrInvT = LrInvSrLrInvT
|
||||||
|
self.Lr = Lr
|
||||||
|
self.Lc = Lc
|
||||||
|
self.kern_r = kern_r
|
||||||
|
self.Xr = Xr
|
||||||
|
self.Zr = Zr
|
||||||
|
|
||||||
|
def _prepare(self):
|
||||||
|
D, Mr, Mc = self.Xr.shape[0], self.Zr.shape[0], self.LcInvMLrInvT.shape[0]
|
||||||
|
psi2_r_n = self.kern_r.psi2n(self.Zr, self.Xr)
|
||||||
|
psi0_r = self.kern_r.psi0(self.Zr, self.Xr)
|
||||||
|
psi1_r = self.kern_r.psi1(self.Zr, self.Xr)
|
||||||
|
|
||||||
|
LrInvPsi1_rT = dtrtrs(self.Lr, psi1_r.T)[0]
|
||||||
|
self.woodbury_vector = self.LcInvMLrInvT.dot(LrInvPsi1_rT)
|
||||||
|
|
||||||
|
LrInvPsi2_r_nLrInvT = dtrtrs(self.Lr, np.swapaxes((dtrtrs(self.Lr, psi2_r_n.reshape(D*Mr,Mr).T)[0].T).reshape(D,Mr,Mr),1,2).reshape(D*Mr,Mr).T)[0].T.reshape(D,Mr,Mr)
|
||||||
|
|
||||||
|
tr_LrInvPsi2_r_nLrInvT = LrInvPsi2_r_nLrInvT.reshape(D,Mr*Mr).sum(1)
|
||||||
|
tr_LrInvPsi2_r_nLrInvT_LrInvSrLrInvT = LrInvPsi2_r_nLrInvT.reshape(D,Mr*mr).dot(self.LrInvSrLrInvT.flat)
|
||||||
|
|
||||||
|
tmp = LrInvPsi2_r_nLrInvT - LrInvPsi1_rT.T[:,:,None]*LrInvPsi1_rT.T[:,None,:]
|
||||||
|
tmp = np.swapaxes(tmp.reshape(D*Mr,Mr).dot(self.LcInvMLrInvT.T).reshape(D,Mr,Mc), 1,2).reshape(D*Mc,Mr).dot(self.LcInvMLrInvT.T).reshape(D,Mc,Mc)
|
||||||
|
|
||||||
|
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
|
||||||
|
|
||||||
|
N = Xnew.shape[0]
|
||||||
|
psi1_c = kern.K(Xnew, pred_var)
|
||||||
|
psi0_c = kern.Kdiag(Xnew)
|
||||||
|
LcInvPsi1_cT = dtrtrs(self.Lc, psi1_c.T)[0]
|
||||||
|
|
||||||
|
D, Mr, Mc = self.Xr.shape[0], self.Zr.shape[0], self.LcInvMLrInvT.shape[0]
|
||||||
|
psi2_r_n = self.kern_r.psi2n(self.Zr, self.Xr)
|
||||||
|
psi0_r = self.kern_r.psi0(self.Zr, self.Xr)
|
||||||
|
psi1_r = self.kern_r.psi1(self.Zr, self.Xr)
|
||||||
|
|
||||||
|
LrInvPsi1_rT = dtrtrs(self.Lr, psi1_r.T)[0]
|
||||||
|
woodbury_vector = self.LcInvMLrInvT.dot(LrInvPsi1_rT)
|
||||||
|
|
||||||
|
mu = np.dot(LcInvPsi1_cT.T, woodbury_vector)
|
||||||
|
|
||||||
|
LrInvPsi2_r_nLrInvT = dtrtrs(self.Lr, np.swapaxes((dtrtrs(self.Lr, psi2_r_n.reshape(D*Mr,Mr).T)[0].T).reshape(D,Mr,Mr),1,2).reshape(D*Mr,Mr).T)[0].T.reshape(D,Mr,Mr)
|
||||||
|
|
||||||
|
tr_LrInvPsi2_r_nLrInvT = np.diagonal(LrInvPsi2_r_nLrInvT,axis1=1,axis2=2).sum(1)
|
||||||
|
tr_LrInvPsi2_r_nLrInvT_LrInvSrLrInvT = LrInvPsi2_r_nLrInvT.reshape(D,Mr*Mr).dot(self.LrInvSrLrInvT.flat)
|
||||||
|
|
||||||
|
tmp = LrInvPsi2_r_nLrInvT - LrInvPsi1_rT.T[:,:,None]*LrInvPsi1_rT.T[:,None,:]
|
||||||
|
tmp = np.swapaxes(tmp.reshape(D*Mr,Mr).dot(self.LcInvMLrInvT.T).reshape(D,Mr,Mc), 1,2).reshape(D*Mc,Mr).dot(self.LcInvMLrInvT.T).reshape(D,Mc,Mc)
|
||||||
|
|
||||||
|
var1 = (tmp.reshape(D*Mc,Mc).dot(LcInvPsi1_cT).reshape(D,Mc,N)*LcInvPsi1_cT[None,:,:]).sum(1).T
|
||||||
|
var2 = psi0_c[:,None]*psi0_r[None,:]
|
||||||
|
var3 = tr_LrInvPsi2_r_nLrInvT[None,:]*np.square(LcInvPsi1_cT).sum(0)[:,None]
|
||||||
|
var4 = tr_LrInvPsi2_r_nLrInvT_LrInvSrLrInvT[None,:]* (self.LcInvScLcInvT.dot(LcInvPsi1_cT)*LcInvPsi1_cT).sum(0)[:,None]
|
||||||
|
var = var1+var2-var3+var4
|
||||||
|
return mu, var
|
||||||
|
|
@ -0,0 +1,306 @@
|
||||||
|
#from .posterior import Posterior
|
||||||
|
from GPy.util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv, dpotri
|
||||||
|
from GPy.util import diag
|
||||||
|
from GPy.core.parameterization.variational import VariationalPosterior
|
||||||
|
import numpy as np
|
||||||
|
from GPy.inference.latent_function_inference import LatentFunctionInference
|
||||||
|
from GPy.inference.latent_function_inference.posterior import Posterior
|
||||||
|
from .vardtc_svi_multiout import PosteriorMultioutput
|
||||||
|
log_2_pi = np.log(2*np.pi)
|
||||||
|
|
||||||
|
|
||||||
|
class VarDTC_SVI_Multiout_Miss(LatentFunctionInference):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
const_jitter = 1e-6
|
||||||
|
|
||||||
|
def get_trYYT(self, Y):
|
||||||
|
return np.sum(np.square(Y))
|
||||||
|
|
||||||
|
def get_YYTfactor(self, Y):
|
||||||
|
N, D = Y.shape
|
||||||
|
if (N>=D):
|
||||||
|
return Y.view(np.ndarray)
|
||||||
|
else:
|
||||||
|
return jitchol(tdot(Y))
|
||||||
|
|
||||||
|
def gatherPsiStat(self, kern, X, Z, uncertain_inputs):
|
||||||
|
|
||||||
|
if uncertain_inputs:
|
||||||
|
psi0 = kern.psi0(Z, X)
|
||||||
|
psi1 = kern.psi1(Z, X)
|
||||||
|
psi2 = kern.psi2n(Z, X)
|
||||||
|
else:
|
||||||
|
psi0 = kern.Kdiag(X)
|
||||||
|
psi1 = kern.K(X, Z)
|
||||||
|
psi2 = psi1[:,:,None]*psi1[:,None,:]
|
||||||
|
|
||||||
|
return psi0, psi1, psi2
|
||||||
|
|
||||||
|
def _init_grad_dict(self, N, D, Mr, Mc):
|
||||||
|
grad_dict = {
|
||||||
|
'dL_dthetaL': np.zeros(D),
|
||||||
|
'dL_dqU_mean': np.zeros((Mc,Mr)),
|
||||||
|
'dL_dqU_var_c':np.zeros((Mc,Mc)),
|
||||||
|
'dL_dqU_var_r':np.zeros((Mr,Mr)),
|
||||||
|
'dL_dKuu_c': np.zeros((Mc,Mc)),
|
||||||
|
'dL_dKuu_r': np.zeros((Mr,Mr)),
|
||||||
|
'dL_dpsi0_c': np.zeros(N),
|
||||||
|
'dL_dpsi1_c': np.zeros((N,Mc)),
|
||||||
|
'dL_dpsi2_c': np.zeros((N,Mc,Mc)),
|
||||||
|
'dL_dpsi0_r': np.zeros(D),
|
||||||
|
'dL_dpsi1_r': np.zeros((D,Mr)),
|
||||||
|
'dL_dpsi2_r': np.zeros((D,Mr,Mr)),
|
||||||
|
}
|
||||||
|
return grad_dict
|
||||||
|
|
||||||
|
def inference_d(self, d, beta, Y, indexD, grad_dict, mid_res, uncertain_inputs_r, uncertain_inputs_c, Mr, Mc):
|
||||||
|
|
||||||
|
idx_d = indexD==d
|
||||||
|
Y = Y[idx_d]
|
||||||
|
N, D = Y.shape[0], 1
|
||||||
|
beta = beta[d]
|
||||||
|
|
||||||
|
psi0_r, psi1_r, psi2_r = mid_res['psi0_r'], mid_res['psi1_r'], mid_res['psi2_r']
|
||||||
|
psi0_c, psi1_c, psi2_c = mid_res['psi0_c'], mid_res['psi1_c'], mid_res['psi2_c']
|
||||||
|
psi0_r, psi1_r, psi2_r = psi0_r[d], psi1_r[d:d+1], psi2_r[d]
|
||||||
|
psi0_c, psi1_c, psi2_c = psi0_c[idx_d].sum(), psi1_c[idx_d], psi2_c[idx_d].sum(0)
|
||||||
|
|
||||||
|
Lr = mid_res['Lr']
|
||||||
|
Lc = mid_res['Lc']
|
||||||
|
LcInvMLrInvT = mid_res['LcInvMLrInvT']
|
||||||
|
LcInvScLcInvT = mid_res['LcInvScLcInvT']
|
||||||
|
LrInvSrLrInvT = mid_res['LrInvSrLrInvT']
|
||||||
|
|
||||||
|
|
||||||
|
LcInvPsi2_cLcInvT = backsub_both_sides(Lc, psi2_c,'right')
|
||||||
|
LrInvPsi2_rLrInvT = backsub_both_sides(Lr, psi2_r,'right')
|
||||||
|
LcInvPsi1_cT = dtrtrs(Lc, psi1_c.T)[0]
|
||||||
|
LrInvPsi1_rT = dtrtrs(Lr, psi1_r.T)[0]
|
||||||
|
|
||||||
|
tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT = (LrInvPsi2_rLrInvT*LrInvSrLrInvT).sum()
|
||||||
|
tr_LcInvPsi2_cLcInvT_LcInvScLcInvT = (LcInvPsi2_cLcInvT*LcInvScLcInvT).sum()
|
||||||
|
tr_LrInvPsi2_rLrInvT = np.trace(LrInvPsi2_rLrInvT)
|
||||||
|
tr_LcInvPsi2_cLcInvT = np.trace(LcInvPsi2_cLcInvT)
|
||||||
|
|
||||||
|
logL_A = - np.square(Y).sum() \
|
||||||
|
- (LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT)*LrInvPsi2_rLrInvT).sum() \
|
||||||
|
- tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT \
|
||||||
|
+ 2 * (Y * LcInvPsi1_cT.T.dot(LcInvMLrInvT).dot(LrInvPsi1_rT)).sum() - psi0_c * psi0_r \
|
||||||
|
+ tr_LrInvPsi2_rLrInvT * tr_LcInvPsi2_cLcInvT
|
||||||
|
|
||||||
|
logL = -N*D/2.*(np.log(2.*np.pi)-np.log(beta)) + beta/2.* logL_A
|
||||||
|
|
||||||
|
# ======= Gradients =====
|
||||||
|
|
||||||
|
tmp = beta* LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) \
|
||||||
|
+ beta* tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT.dot(LcInvScLcInvT) \
|
||||||
|
- beta* LcInvMLrInvT.dot(LrInvPsi1_rT).dot(Y.T).dot(LcInvPsi1_cT.T) \
|
||||||
|
- beta/2. * tr_LrInvPsi2_rLrInvT* LcInvPsi2_cLcInvT
|
||||||
|
|
||||||
|
dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
|
||||||
|
dL_dKuu_c += dL_dKuu_c.T
|
||||||
|
dL_dKuu_c *= 0.5
|
||||||
|
|
||||||
|
tmp = beta* LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT) \
|
||||||
|
+ beta* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT.dot(LrInvSrLrInvT) \
|
||||||
|
- beta* LrInvPsi1_rT.dot(Y.T).dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT) \
|
||||||
|
- beta/2. * tr_LcInvPsi2_cLcInvT * LrInvPsi2_rLrInvT
|
||||||
|
|
||||||
|
dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
|
||||||
|
dL_dKuu_r += dL_dKuu_r.T
|
||||||
|
dL_dKuu_r *= 0.5
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dthetaL
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
dL_dthetaL = -D*N*beta/2. - logL_A*beta*beta/2.
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dqU
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
tmp = -beta * LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT)\
|
||||||
|
+ beta* LcInvPsi1_cT.dot(Y).dot(LrInvPsi1_rT.T)
|
||||||
|
|
||||||
|
dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]
|
||||||
|
|
||||||
|
tmp = -beta/2.*tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT
|
||||||
|
dL_dqU_var_c = backsub_both_sides(Lc, tmp, 'left')
|
||||||
|
|
||||||
|
tmp = -beta/2.*tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT
|
||||||
|
dL_dqU_var_r = backsub_both_sides(Lr, tmp, 'left')
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dpsi
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
dL_dpsi0_r = - psi0_c * beta/2. * np.ones((D,))
|
||||||
|
dL_dpsi0_c = - psi0_r * beta/2. * np.ones((N,))
|
||||||
|
|
||||||
|
dL_dpsi1_c = beta * dtrtrs(Lc, (Y.dot(LrInvPsi1_rT.T).dot(LcInvMLrInvT.T)).T, trans=1)[0].T
|
||||||
|
dL_dpsi1_r = beta * dtrtrs(Lr, (Y.T.dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT)).T, trans=1)[0].T
|
||||||
|
|
||||||
|
tmp = beta/2.*(-LcInvMLrInvT.dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) - tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvScLcInvT
|
||||||
|
+tr_LrInvPsi2_rLrInvT *np.eye(Mc))
|
||||||
|
dL_dpsi2_c = backsub_both_sides(Lc, tmp, 'left')
|
||||||
|
tmp = beta/2.*(-LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT) - tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvSrLrInvT
|
||||||
|
+tr_LcInvPsi2_cLcInvT *np.eye(Mr))
|
||||||
|
dL_dpsi2_r = backsub_both_sides(Lr, tmp, 'left')
|
||||||
|
|
||||||
|
grad_dict['dL_dthetaL'][d:d+1] = dL_dthetaL
|
||||||
|
grad_dict['dL_dqU_mean'] += dL_dqU_mean
|
||||||
|
grad_dict['dL_dqU_var_c'] += dL_dqU_var_c
|
||||||
|
grad_dict['dL_dqU_var_r'] += dL_dqU_var_r
|
||||||
|
grad_dict['dL_dKuu_c'] += dL_dKuu_c
|
||||||
|
grad_dict['dL_dKuu_r'] += dL_dKuu_r
|
||||||
|
|
||||||
|
# if not uncertain_inputs_r:
|
||||||
|
# dL_dpsi1_r += (dL_dpsi2_r * psi1_r[:,:,None]).sum(1) + (dL_dpsi2_r * psi1_r[:,None,:]).sum(2)
|
||||||
|
# if not uncertain_inputs_c:
|
||||||
|
# dL_dpsi1_c += (dL_dpsi2_c * psi1_c[:,:,None]).sum(1) + (dL_dpsi2_c * psi1_c[:,None,:]).sum(2)
|
||||||
|
|
||||||
|
if not uncertain_inputs_r:
|
||||||
|
dL_dpsi1_r += psi1_r.dot(dL_dpsi2_r+dL_dpsi2_r.T)
|
||||||
|
if not uncertain_inputs_c:
|
||||||
|
dL_dpsi1_c += psi1_c.dot(dL_dpsi2_c+dL_dpsi2_c.T)
|
||||||
|
|
||||||
|
grad_dict['dL_dpsi0_c'][idx_d] += dL_dpsi0_c
|
||||||
|
grad_dict['dL_dpsi1_c'][idx_d] += dL_dpsi1_c
|
||||||
|
grad_dict['dL_dpsi2_c'][idx_d] += dL_dpsi2_c
|
||||||
|
|
||||||
|
grad_dict['dL_dpsi0_r'][d:d+1] += dL_dpsi0_r
|
||||||
|
grad_dict['dL_dpsi1_r'][d:d+1] += dL_dpsi1_r
|
||||||
|
grad_dict['dL_dpsi2_r'][d] += dL_dpsi2_r
|
||||||
|
|
||||||
|
|
||||||
|
return logL
|
||||||
|
|
||||||
|
|
||||||
|
def inference(self, kern_r, kern_c, Xr, Xc, Zr, Zc, likelihood, Y, qU_mean ,qU_var_r, qU_var_c, indexD, output_dim):
|
||||||
|
"""
|
||||||
|
The SVI-VarDTC inference
|
||||||
|
"""
|
||||||
|
|
||||||
|
N, D, Mr, Mc, Qr, Qc = Y.shape[0], output_dim,Zr.shape[0], Zc.shape[0], Zr.shape[1], Zc.shape[1]
|
||||||
|
|
||||||
|
uncertain_inputs_r = isinstance(Xr, VariationalPosterior)
|
||||||
|
uncertain_inputs_c = isinstance(Xc, VariationalPosterior)
|
||||||
|
uncertain_outputs = isinstance(Y, VariationalPosterior)
|
||||||
|
|
||||||
|
grad_dict = self._init_grad_dict(N,D,Mr,Mc)
|
||||||
|
|
||||||
|
beta = 1./likelihood.variance
|
||||||
|
if len(beta)==1:
|
||||||
|
beta = np.zeros(D)+beta
|
||||||
|
|
||||||
|
psi0_r, psi1_r, psi2_r = self.gatherPsiStat(kern_r, Xr, Zr, uncertain_inputs_r)
|
||||||
|
psi0_c, psi1_c, psi2_c = self.gatherPsiStat(kern_c, Xc, Zc, uncertain_inputs_c)
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute Common Components
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
Kuu_r = kern_r.K(Zr).copy()
|
||||||
|
diag.add(Kuu_r, self.const_jitter)
|
||||||
|
Lr = jitchol(Kuu_r)
|
||||||
|
|
||||||
|
Kuu_c = kern_c.K(Zc).copy()
|
||||||
|
diag.add(Kuu_c, self.const_jitter)
|
||||||
|
Lc = jitchol(Kuu_c)
|
||||||
|
|
||||||
|
mu, Sr, Sc = qU_mean, qU_var_r, qU_var_c
|
||||||
|
LSr = jitchol(Sr)
|
||||||
|
LSc = jitchol(Sc)
|
||||||
|
|
||||||
|
LcInvMLrInvT = dtrtrs(Lc,dtrtrs(Lr,mu.T)[0].T)[0]
|
||||||
|
LcInvLSc = dtrtrs(Lc, LSc)[0]
|
||||||
|
LrInvLSr = dtrtrs(Lr, LSr)[0]
|
||||||
|
LcInvScLcInvT = tdot(LcInvLSc)
|
||||||
|
LrInvSrLrInvT = tdot(LrInvLSr)
|
||||||
|
tr_LrInvSrLrInvT = np.square(LrInvLSr).sum()
|
||||||
|
tr_LcInvScLcInvT = np.square(LcInvLSc).sum()
|
||||||
|
|
||||||
|
mid_res = {
|
||||||
|
'psi0_r': psi0_r,
|
||||||
|
'psi1_r': psi1_r,
|
||||||
|
'psi2_r': psi2_r,
|
||||||
|
'psi0_c': psi0_c,
|
||||||
|
'psi1_c': psi1_c,
|
||||||
|
'psi2_c': psi2_c,
|
||||||
|
'Lr':Lr,
|
||||||
|
'Lc':Lc,
|
||||||
|
'LcInvMLrInvT': LcInvMLrInvT,
|
||||||
|
'LcInvScLcInvT': LcInvScLcInvT,
|
||||||
|
'LrInvSrLrInvT': LrInvSrLrInvT,
|
||||||
|
}
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute log-likelihood
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
logL = 0.
|
||||||
|
for d in range(D):
|
||||||
|
logL += self.inference_d(d, beta, Y, indexD, grad_dict, mid_res, uncertain_inputs_r, uncertain_inputs_c, Mr, Mc)
|
||||||
|
|
||||||
|
logL += -Mc * (np.log(np.diag(Lr)).sum()-np.log(np.diag(LSr)).sum()) -Mr * (np.log(np.diag(Lc)).sum()-np.log(np.diag(LSc)).sum()) \
|
||||||
|
- np.square(LcInvMLrInvT).sum()/2. - tr_LrInvSrLrInvT * tr_LcInvScLcInvT/2. + Mr*Mc/2.
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dKuu
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
tmp = tdot(LcInvMLrInvT)/2. + tr_LrInvSrLrInvT/2. * LcInvScLcInvT - Mr/2.*np.eye(Mc)
|
||||||
|
|
||||||
|
dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
|
||||||
|
dL_dKuu_c += dL_dKuu_c.T
|
||||||
|
dL_dKuu_c *= 0.5
|
||||||
|
|
||||||
|
tmp = tdot(LcInvMLrInvT.T)/2. + tr_LcInvScLcInvT/2. * LrInvSrLrInvT - Mc/2.*np.eye(Mr)
|
||||||
|
|
||||||
|
dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
|
||||||
|
dL_dKuu_r += dL_dKuu_r.T
|
||||||
|
dL_dKuu_r *= 0.5
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dqU
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
tmp = - LcInvMLrInvT
|
||||||
|
dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]
|
||||||
|
|
||||||
|
LScInv = dtrtri(LSc)
|
||||||
|
tmp = -tr_LrInvSrLrInvT/2.*np.eye(Mc)
|
||||||
|
dL_dqU_var_c = backsub_both_sides(Lc, tmp, 'left') + tdot(LScInv.T) * Mr/2.
|
||||||
|
|
||||||
|
LSrInv = dtrtri(LSr)
|
||||||
|
tmp = -tr_LcInvScLcInvT/2.*np.eye(Mr)
|
||||||
|
dL_dqU_var_r = backsub_both_sides(Lr, tmp, 'left') + tdot(LSrInv.T) * Mc/2.
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute the Posterior distribution of inducing points p(u|Y)
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
post = PosteriorMultioutput(LcInvMLrInvT=LcInvMLrInvT, LcInvScLcInvT=LcInvScLcInvT,
|
||||||
|
LrInvSrLrInvT=LrInvSrLrInvT, Lr=Lr, Lc=Lc, kern_r=kern_r, Xr=Xr, Zr=Zr)
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dpsi
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
grad_dict['dL_dqU_mean'] += dL_dqU_mean
|
||||||
|
grad_dict['dL_dqU_var_c'] += dL_dqU_var_c
|
||||||
|
grad_dict['dL_dqU_var_r'] += dL_dqU_var_r
|
||||||
|
grad_dict['dL_dKuu_c'] += dL_dKuu_c
|
||||||
|
grad_dict['dL_dKuu_r'] += dL_dKuu_r
|
||||||
|
|
||||||
|
if not uncertain_inputs_c:
|
||||||
|
grad_dict['dL_dKdiag_c'] = grad_dict['dL_dpsi0_c']
|
||||||
|
grad_dict['dL_dKfu_c'] = grad_dict['dL_dpsi1_c']
|
||||||
|
|
||||||
|
if not uncertain_inputs_r:
|
||||||
|
grad_dict['dL_dKdiag_r'] = grad_dict['dL_dpsi0_r']
|
||||||
|
grad_dict['dL_dKfu_r'] = grad_dict['dL_dpsi1_r']
|
||||||
|
|
||||||
|
return post, logL, grad_dict
|
||||||
|
|
@ -27,3 +27,5 @@ from .state_space_model import StateSpace
|
||||||
from .ibp_lfm import IBPLFM
|
from .ibp_lfm import IBPLFM
|
||||||
from .gp_offset_regression import GPOffsetRegression
|
from .gp_offset_regression import GPOffsetRegression
|
||||||
from .gp_grid_regression import GPRegressionGrid
|
from .gp_grid_regression import GPRegressionGrid
|
||||||
|
from .gp_multiout_regression import GPMultioutRegression
|
||||||
|
from .gp_multiout_regression_md import GPMultioutRegressionMD
|
||||||
|
|
|
||||||
175
GPy/models/gp_multiout_regression.py
Normal file
175
GPy/models/gp_multiout_regression.py
Normal file
|
|
@ -0,0 +1,175 @@
|
||||||
|
# Copyright (c) 2017 Zhenwen Dai
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from ..core import SparseGP
|
||||||
|
from .. import likelihoods
|
||||||
|
from .. import kern
|
||||||
|
from .. import util
|
||||||
|
from GPy.core.parameterization.variational import NormalPosterior, NormalPrior
|
||||||
|
from ..core.parameterization.param import Param
|
||||||
|
from paramz.transformations import Logexp
|
||||||
|
from ..util.linalg import tdot
|
||||||
|
|
||||||
|
class GPMultioutRegression(SparseGP):
|
||||||
|
"""
|
||||||
|
Gaussian Process model for scalable multioutput regression
|
||||||
|
|
||||||
|
This is a thin wrapper around the models.GP class, with a set of sensible defaults
|
||||||
|
|
||||||
|
:param X_list: list of input observations corresponding to each output
|
||||||
|
:type X_list: list of numpy arrays
|
||||||
|
:param Y_list: list of observed values related to the different noise models
|
||||||
|
:type Y_list: list of numpy arrays
|
||||||
|
:param kernel: a GPy kernel ** Coregionalized, defaults to RBF ** Coregionalized
|
||||||
|
:type kernel: None | GPy.kernel defaults
|
||||||
|
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
|
||||||
|
:type likelihoods_list: None | a list GPy.likelihoods
|
||||||
|
:param name: model name
|
||||||
|
:type name: string
|
||||||
|
:param W_rank: number tuples of the corregionalization parameters 'W' (see coregionalize kernel documentation)
|
||||||
|
:type W_rank: integer
|
||||||
|
:param kernel_name: name of the kernel
|
||||||
|
:type kernel_name: string
|
||||||
|
"""
|
||||||
|
def __init__(self, X, Y, Xr_dim, kernel=None, kernel_row=None, likelihood=None, Z=None, Z_row=None, X_row=None, Xvariance_row=None, num_inducing=(10,10), qU_var_r_W_dim=None, qU_var_c_W_dim=None, init='GP', name='GPMR'):
|
||||||
|
|
||||||
|
#Kernel
|
||||||
|
if kernel is None:
|
||||||
|
kernel = kern.RBF(X.shape[1])
|
||||||
|
|
||||||
|
if kernel_row is None:
|
||||||
|
kernel_row = kern.RBF(Xr_dim,name='kern_row')
|
||||||
|
|
||||||
|
if init=='GP':
|
||||||
|
from . import SparseGPRegression, BayesianGPLVM
|
||||||
|
from ..util.linalg import jitchol
|
||||||
|
Mc, Mr = num_inducing
|
||||||
|
print('Intializing with GP...')
|
||||||
|
print('Fit Sparse GP...')
|
||||||
|
m_sgp = SparseGPRegression(X,Y,kernel=kernel.copy(),num_inducing=Mc)
|
||||||
|
m_sgp.likelihood.variance[:] = Y.var()*0.01
|
||||||
|
m_sgp.optimize(max_iters=1000)
|
||||||
|
print('Fit BGPLVM...')
|
||||||
|
m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,Xr_dim,kernel=kernel_row.copy(), num_inducing=Mr)
|
||||||
|
m_lvm.likelihood.variance[:] = m_lvm.Y.var()*0.01
|
||||||
|
m_lvm.optimize(max_iters=10000)
|
||||||
|
|
||||||
|
kernel[:] = m_sgp.kern.param_array.copy()
|
||||||
|
kernel.variance[:] = np.sqrt(kernel.variance)
|
||||||
|
Z = m_sgp.Z.values.copy()
|
||||||
|
kernel_row[:] = m_lvm.kern.param_array.copy()
|
||||||
|
kernel_row.variance[:] = np.sqrt(kernel_row.variance)
|
||||||
|
Z_row = m_lvm.Z.values.copy()
|
||||||
|
X_row = m_lvm.X.mean.values.copy()
|
||||||
|
Xvariance_row = m_lvm.X.variance.values
|
||||||
|
|
||||||
|
qU_mean = m_lvm.posterior.mean.T.copy()
|
||||||
|
qU_var_col_W = jitchol(m_sgp.posterior.covariance)
|
||||||
|
qU_var_col_diag = np.full(Mc,1e-5)
|
||||||
|
qU_var_row_W = jitchol(m_lvm.posterior.covariance)
|
||||||
|
qU_var_row_diag = np.full(Mr,1e-5)
|
||||||
|
print('Done.')
|
||||||
|
else:
|
||||||
|
qU_mean = np.zeros(num_inducing)
|
||||||
|
qU_var_col_W = np.random.randn(num_inducing[0],num_inducing[0] if qU_var_c_W_dim is None else qU_var_c_W_dim)*0.01
|
||||||
|
qU_var_col_diag = np.full(num_inducing[0],1e-5)
|
||||||
|
qU_var_row_W = np.random.randn(num_inducing[1],num_inducing[1] if qU_var_r_W_dim is None else qU_var_r_W_dim)*0.01
|
||||||
|
qU_var_row_diag = np.full(num_inducing[1],1e-5)
|
||||||
|
|
||||||
|
if X_row is None:
|
||||||
|
u,s,v = np.linalg.svd(Y)
|
||||||
|
X_row = Y.T.dot(u[:,:Xr_dim])#*np.sqrt(s)[:Xr_dim])
|
||||||
|
X_row = X_row/X_row.std(0)
|
||||||
|
if Xvariance_row is None:
|
||||||
|
Xvariance_row = np.ones((Y.shape[1],Xr_dim))*0.0001
|
||||||
|
if Z is None:
|
||||||
|
Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
|
||||||
|
if Z_row is None:
|
||||||
|
Z_row = X_row[np.random.permutation(X_row.shape[0])[:num_inducing[1]]].copy()
|
||||||
|
|
||||||
|
self.kern_row = kernel_row
|
||||||
|
self.X_row = NormalPosterior(X_row, Xvariance_row,name='Xr')
|
||||||
|
self.Z_row = Param('Zr', Z_row)
|
||||||
|
self.variational_prior_row = NormalPrior()
|
||||||
|
|
||||||
|
self.qU_mean = Param('qU_mean', qU_mean)
|
||||||
|
self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
|
||||||
|
self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag, Logexp())
|
||||||
|
self.qU_var_r_W = Param('qU_var_row_W',qU_var_row_W)
|
||||||
|
self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag, Logexp())
|
||||||
|
|
||||||
|
#Likelihood
|
||||||
|
likelihood = likelihoods.Gaussian(variance=np.var(Y)*0.01)
|
||||||
|
from ..inference.latent_function_inference import VarDTC_SVI_Multiout
|
||||||
|
inference_method = VarDTC_SVI_Multiout()
|
||||||
|
|
||||||
|
super(GPMultioutRegression,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
|
||||||
|
name=name, inference_method=inference_method)
|
||||||
|
|
||||||
|
self.link_parameters(self.kern_row, self.X_row, self.Z_row,self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag, self.qU_var_r_W, self.qU_var_r_diag)
|
||||||
|
|
||||||
|
self._log_marginal_likelihood = np.nan
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
qU_var_c = tdot(self.qU_var_c_W) + np.diag(self.qU_var_c_diag)
|
||||||
|
qU_var_r = tdot(self.qU_var_r_W) + np.diag(self.qU_var_r_diag)
|
||||||
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern_row, self.kern, self.X_row, self.X, self.Z_row, self.Z, self.likelihood, self.Y, self.qU_mean ,qU_var_r, qU_var_c)
|
||||||
|
|
||||||
|
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
||||||
|
self.qU_mean.gradient[:] = self.grad_dict['dL_dqU_mean']
|
||||||
|
self.qU_var_c_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_c'])
|
||||||
|
self.qU_var_c_W.gradient[:] = (self.grad_dict['dL_dqU_var_c']+self.grad_dict['dL_dqU_var_c'].T).dot(self.qU_var_c_W)
|
||||||
|
self.qU_var_r_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_r'])
|
||||||
|
self.qU_var_r_W.gradient[:] = (self.grad_dict['dL_dqU_var_r']+self.grad_dict['dL_dqU_var_r'].T).dot(self.qU_var_r_W)
|
||||||
|
|
||||||
|
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag_c'], self.X)
|
||||||
|
kerngrad = self.kern.gradient.copy()
|
||||||
|
self.kern.update_gradients_full(self.grad_dict['dL_dKfu_c'], self.X, self.Z)
|
||||||
|
kerngrad += self.kern.gradient
|
||||||
|
self.kern.update_gradients_full(self.grad_dict['dL_dKuu_c'], self.Z, None)
|
||||||
|
self.kern.gradient += kerngrad
|
||||||
|
#gradients wrt Z
|
||||||
|
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKuu_c'], self.Z)
|
||||||
|
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKfu_c'].T, self.Z, self.X)
|
||||||
|
|
||||||
|
|
||||||
|
#gradients wrt kernel
|
||||||
|
self.kern_row.update_gradients_full(self.grad_dict['dL_dKuu_r'], self.Z_row, None)
|
||||||
|
kerngrad = self.kern_row.gradient.copy()
|
||||||
|
self.kern_row.update_gradients_expectations(variational_posterior=self.X_row,
|
||||||
|
Z=self.Z_row,
|
||||||
|
dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
|
||||||
|
dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
|
||||||
|
dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
|
||||||
|
self.kern_row.gradient += kerngrad
|
||||||
|
|
||||||
|
#gradients wrt Z
|
||||||
|
self.Z_row.gradient = self.kern_row.gradients_X(self.grad_dict['dL_dKuu_r'], self.Z_row)
|
||||||
|
self.Z_row.gradient += self.kern_row.gradients_Z_expectations(
|
||||||
|
self.grad_dict['dL_dpsi0_r'],
|
||||||
|
self.grad_dict['dL_dpsi1_r'],
|
||||||
|
self.grad_dict['dL_dpsi2_r'],
|
||||||
|
Z=self.Z_row,
|
||||||
|
variational_posterior=self.X_row)
|
||||||
|
|
||||||
|
self._log_marginal_likelihood -= self.variational_prior_row.KL_divergence(self.X_row)
|
||||||
|
|
||||||
|
self.X_row.mean.gradient, self.X_row.variance.gradient = self.kern_row.gradients_qX_expectations(
|
||||||
|
variational_posterior=self.X_row,
|
||||||
|
Z=self.Z_row,
|
||||||
|
dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
|
||||||
|
dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
|
||||||
|
dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
|
||||||
|
|
||||||
|
self.variational_prior_row.update_gradients_KL(self.X_row)
|
||||||
|
|
||||||
|
def optimize_auto(self,max_iters=10000,verbose=True):
|
||||||
|
self.Z.fix(warning=False)
|
||||||
|
self.kern.fix(warning=False)
|
||||||
|
self.kern_row.fix(warning=False)
|
||||||
|
self.Zr.fix(warning=False)
|
||||||
|
self.Xr.fix(warning=False)
|
||||||
|
self.optimize(max_iters=int(0.1*max_iters),messages=verbose)
|
||||||
|
self.unfix()
|
||||||
|
self.optimize(max_iters=max_iters,messages=verbose)
|
||||||
173
GPy/models/gp_multiout_regression_md.py
Normal file
173
GPy/models/gp_multiout_regression_md.py
Normal file
|
|
@ -0,0 +1,173 @@
|
||||||
|
# Copyright (c) 2017 Zhenwen Dai
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from ..core import SparseGP
|
||||||
|
from .. import likelihoods
|
||||||
|
from .. import kern
|
||||||
|
from .. import util
|
||||||
|
from GPy.core.parameterization.variational import NormalPosterior, NormalPrior
|
||||||
|
from ..core.parameterization.param import Param
|
||||||
|
from paramz.transformations import Logexp
|
||||||
|
from ..util.linalg import tdot
|
||||||
|
from .sparse_gp_regression_md import SparseGPRegressionMD
|
||||||
|
|
||||||
|
class GPMultioutRegressionMD(SparseGP):
|
||||||
|
"""
|
||||||
|
Gaussian Process model for scalable multioutput regression
|
||||||
|
|
||||||
|
This is a thin wrapper around the models.GP class, with a set of sensible defaults
|
||||||
|
"""
|
||||||
|
def __init__(self, X, Y, indexD, Xr_dim, kernel=None, kernel_row=None, likelihood=None, Z=None, Z_row=None, X_row=None, Xvariance_row=None, num_inducing=(10,10), qU_var_r_W_dim=None, qU_var_c_W_dim=None, init='GP', heter_noise=False, name='GPMR'):
|
||||||
|
|
||||||
|
assert len(Y.shape)==1 or Y.shape[1]==1
|
||||||
|
|
||||||
|
self.output_dim = int(np.max(indexD))+1
|
||||||
|
self.heter_noise = heter_noise
|
||||||
|
self.indexD = indexD
|
||||||
|
|
||||||
|
#Kernel
|
||||||
|
if kernel is None:
|
||||||
|
kernel = kern.RBF(X.shape[1])
|
||||||
|
if kernel_row is None:
|
||||||
|
kernel_row = kern.RBF(Xr_dim,name='kern_row')
|
||||||
|
|
||||||
|
if init=='GP':
|
||||||
|
from . import SparseGPRegression, BayesianGPLVM
|
||||||
|
from ..util.linalg import jitchol
|
||||||
|
Mc, Mr = num_inducing
|
||||||
|
print('Intializing with GP...')
|
||||||
|
print('Fit Sparse GP...')
|
||||||
|
m_sgp = SparseGPRegressionMD(X,Y,indexD,kernel=kernel.copy(),num_inducing=Mc)
|
||||||
|
m_sgp.likelihood.variance[:] = Y.var()*0.01
|
||||||
|
m_sgp.optimize(max_iters=1000)
|
||||||
|
print('Fit BGPLVM...')
|
||||||
|
m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,Xr_dim,kernel=kernel_row.copy(), num_inducing=Mr)
|
||||||
|
m_lvm.likelihood.variance[:] = m_lvm.Y.var()*0.01
|
||||||
|
m_lvm.optimize(max_iters=10000)
|
||||||
|
|
||||||
|
kernel[:] = m_sgp.kern.param_array.copy()
|
||||||
|
kernel.variance[:] = np.sqrt(kernel.variance)
|
||||||
|
Z = m_sgp.Z.values.copy()
|
||||||
|
kernel_row[:] = m_lvm.kern.param_array.copy()
|
||||||
|
kernel_row.variance[:] = np.sqrt(kernel_row.variance)
|
||||||
|
Z_row = m_lvm.Z.values.copy()
|
||||||
|
X_row = m_lvm.X.mean.values.copy()
|
||||||
|
Xvariance_row = m_lvm.X.variance.values
|
||||||
|
|
||||||
|
qU_mean = m_lvm.posterior.mean.T.copy()
|
||||||
|
qU_var_col_W = jitchol(m_sgp.posterior.covariance)
|
||||||
|
qU_var_col_diag = np.full(Mc,1e-5)
|
||||||
|
qU_var_row_W = jitchol(m_lvm.posterior.covariance)
|
||||||
|
qU_var_row_diag = np.full(Mr,1e-5)
|
||||||
|
print('Done.')
|
||||||
|
else:
|
||||||
|
qU_mean = np.zeros(num_inducing)
|
||||||
|
qU_var_col_W = np.random.randn(num_inducing[0],num_inducing[0] if qU_var_c_W_dim is None else qU_var_c_W_dim)*0.01
|
||||||
|
qU_var_col_diag = np.full(num_inducing[0],1e-5)
|
||||||
|
qU_var_row_W = np.random.randn(num_inducing[1],num_inducing[1] if qU_var_r_W_dim is None else qU_var_r_W_dim)*0.01
|
||||||
|
qU_var_row_diag = np.full(num_inducing[1],1e-5)
|
||||||
|
|
||||||
|
|
||||||
|
if Z is None:
|
||||||
|
Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
|
||||||
|
if X_row is None:
|
||||||
|
X_row = np.random.randn(self.output_dim,Xr_dim)
|
||||||
|
if Xvariance_row is None:
|
||||||
|
Xvariance_row = np.ones((self.output_dim,Xr_dim))*0.0001
|
||||||
|
if Z_row is None:
|
||||||
|
Z_row = X_row[np.random.permutation(X_row.shape[0])[:num_inducing[1]]].copy()
|
||||||
|
|
||||||
|
self.kern_row = kernel_row
|
||||||
|
self.X_row = NormalPosterior(X_row, Xvariance_row,name='Xr')
|
||||||
|
self.Z_row = Param('Zr', Z_row)
|
||||||
|
self.variational_prior_row = NormalPrior()
|
||||||
|
|
||||||
|
self.qU_mean = Param('qU_mean', qU_mean)
|
||||||
|
self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
|
||||||
|
self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag, Logexp())
|
||||||
|
self.qU_var_r_W = Param('qU_var_row_W',qU_var_row_W)
|
||||||
|
self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag, Logexp())
|
||||||
|
|
||||||
|
#Likelihood
|
||||||
|
if heter_noise:
|
||||||
|
likelihood = likelihoods.Gaussian(variance=np.array([np.var(Y[indexD==d]) for d in range(self.output_dim)])*0.01)
|
||||||
|
else:
|
||||||
|
likelihood = likelihoods.Gaussian(variance=np.var(Y)*0.01)
|
||||||
|
from ..inference.latent_function_inference.vardtc_svi_multiout_miss import VarDTC_SVI_Multiout_Miss
|
||||||
|
inference_method = VarDTC_SVI_Multiout_Miss()
|
||||||
|
|
||||||
|
super(GPMultioutRegressionMD,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
|
||||||
|
name=name, inference_method=inference_method)
|
||||||
|
self.output_dim = int(np.max(indexD))+1
|
||||||
|
|
||||||
|
self.link_parameters(self.kern_row, self.X_row, self.Z_row,self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag, self.qU_var_r_W, self.qU_var_r_diag)
|
||||||
|
|
||||||
|
self._log_marginal_likelihood = np.nan
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
qU_var_c = tdot(self.qU_var_c_W) + np.diag(self.qU_var_c_diag)
|
||||||
|
qU_var_r = tdot(self.qU_var_r_W) + np.diag(self.qU_var_r_diag)
|
||||||
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern_row, self.kern, self.X_row, self.X, self.Z_row, self.Z, self.likelihood, self.Y, self.qU_mean ,qU_var_r, qU_var_c, self.indexD, self.output_dim)
|
||||||
|
# import pdb;pdb.set_trace()
|
||||||
|
|
||||||
|
if self.heter_noise:
|
||||||
|
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
||||||
|
else:
|
||||||
|
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'].sum())
|
||||||
|
self.qU_mean.gradient[:] = self.grad_dict['dL_dqU_mean']
|
||||||
|
self.qU_var_c_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_c'])
|
||||||
|
self.qU_var_c_W.gradient[:] = (self.grad_dict['dL_dqU_var_c']+self.grad_dict['dL_dqU_var_c'].T).dot(self.qU_var_c_W)
|
||||||
|
self.qU_var_r_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_r'])
|
||||||
|
self.qU_var_r_W.gradient[:] = (self.grad_dict['dL_dqU_var_r']+self.grad_dict['dL_dqU_var_r'].T).dot(self.qU_var_r_W)
|
||||||
|
|
||||||
|
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag_c'], self.X)
|
||||||
|
kerngrad = self.kern.gradient.copy()
|
||||||
|
self.kern.update_gradients_full(self.grad_dict['dL_dKfu_c'], self.X, self.Z)
|
||||||
|
kerngrad += self.kern.gradient
|
||||||
|
self.kern.update_gradients_full(self.grad_dict['dL_dKuu_c'], self.Z, None)
|
||||||
|
self.kern.gradient += kerngrad
|
||||||
|
#gradients wrt Z
|
||||||
|
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKuu_c'], self.Z)
|
||||||
|
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKfu_c'].T, self.Z, self.X)
|
||||||
|
|
||||||
|
|
||||||
|
#gradients wrt kernel
|
||||||
|
self.kern_row.update_gradients_full(self.grad_dict['dL_dKuu_r'], self.Z_row, None)
|
||||||
|
kerngrad = self.kern_row.gradient.copy()
|
||||||
|
self.kern_row.update_gradients_expectations(variational_posterior=self.X_row,
|
||||||
|
Z=self.Z_row,
|
||||||
|
dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
|
||||||
|
dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
|
||||||
|
dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
|
||||||
|
self.kern_row.gradient += kerngrad
|
||||||
|
|
||||||
|
#gradients wrt Z
|
||||||
|
self.Z_row.gradient = self.kern_row.gradients_X(self.grad_dict['dL_dKuu_r'], self.Z_row)
|
||||||
|
self.Z_row.gradient += self.kern_row.gradients_Z_expectations(
|
||||||
|
self.grad_dict['dL_dpsi0_r'],
|
||||||
|
self.grad_dict['dL_dpsi1_r'],
|
||||||
|
self.grad_dict['dL_dpsi2_r'],
|
||||||
|
Z=self.Z_row,
|
||||||
|
variational_posterior=self.X_row)
|
||||||
|
|
||||||
|
self._log_marginal_likelihood -= self.variational_prior_row.KL_divergence(self.X_row)
|
||||||
|
|
||||||
|
self.X_row.mean.gradient, self.X_row.variance.gradient = self.kern_row.gradients_qX_expectations(
|
||||||
|
variational_posterior=self.X_row,
|
||||||
|
Z=self.Z_row,
|
||||||
|
dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
|
||||||
|
dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
|
||||||
|
dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
|
||||||
|
|
||||||
|
self.variational_prior_row.update_gradients_KL(self.X_row)
|
||||||
|
|
||||||
|
def optimize_auto(self,max_iters=10000,verbose=True):
|
||||||
|
self.Z.fix(warning=False)
|
||||||
|
self.kern.fix(warning=False)
|
||||||
|
self.kern_row.fix(warning=False)
|
||||||
|
self.Zr.fix(warning=False)
|
||||||
|
self.Xr.fix(warning=False)
|
||||||
|
self.optimize(max_iters=int(0.1*max_iters),messages=verbose)
|
||||||
|
self.unfix()
|
||||||
|
self.optimize(max_iters=max_iters,messages=verbose)
|
||||||
64
GPy/models/sparse_gp_regression_md.py
Normal file
64
GPy/models/sparse_gp_regression_md.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
# Copyright (c) 2012, James Hensman
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from ..core.sparse_gp_mpi import SparseGP_MPI
|
||||||
|
from .. import likelihoods
|
||||||
|
from .. import kern
|
||||||
|
from ..inference.latent_function_inference.vardtc_md import VarDTC_MD
|
||||||
|
from GPy.core.parameterization.variational import NormalPosterior
|
||||||
|
|
||||||
|
class SparseGPRegressionMD(SparseGP_MPI):
|
||||||
|
"""
|
||||||
|
Sparse Gaussian Process Regression with Missing Data
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, X, Y, indexD, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None, individual_Y_noise=False, name='sparse_gp'):
|
||||||
|
|
||||||
|
assert len(Y.shape)==1 or Y.shape[1]==1
|
||||||
|
self.individual_Y_noise = individual_Y_noise
|
||||||
|
self.indexD = indexD
|
||||||
|
output_dim = int(np.max(indexD))+1
|
||||||
|
|
||||||
|
num_data, input_dim = X.shape
|
||||||
|
|
||||||
|
# kern defaults to rbf (plus white for stability)
|
||||||
|
if kernel is None:
|
||||||
|
kernel = kern.RBF(input_dim)# + kern.white(input_dim, variance=1e-3)
|
||||||
|
|
||||||
|
# Z defaults to a subset of the data
|
||||||
|
if Z is None:
|
||||||
|
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
|
||||||
|
Z = X.view(np.ndarray)[i].copy()
|
||||||
|
else:
|
||||||
|
assert Z.shape[1] == input_dim
|
||||||
|
|
||||||
|
if individual_Y_noise:
|
||||||
|
likelihood = likelihoods.Gaussian(variance=np.array([np.var(Y[indexD==d]) for d in range(output_dim)])*0.01)
|
||||||
|
else:
|
||||||
|
likelihood = likelihoods.Gaussian(variance=np.var(Y)*0.01)
|
||||||
|
|
||||||
|
if not (X_variance is None):
|
||||||
|
X = NormalPosterior(X,X_variance)
|
||||||
|
|
||||||
|
infr = VarDTC_MD()
|
||||||
|
|
||||||
|
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm, name=name)
|
||||||
|
self.output_dim = output_dim
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
|
||||||
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.indexD, self.output_dim, self.Y_metadata)
|
||||||
|
|
||||||
|
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'] if self.individual_Y_noise else self.grad_dict['dL_dthetaL'].sum())
|
||||||
|
|
||||||
|
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
|
||||||
|
kerngrad = self.kern.gradient.copy()
|
||||||
|
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
|
||||||
|
kerngrad += self.kern.gradient
|
||||||
|
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
|
||||||
|
self.kern.gradient += kerngrad
|
||||||
|
#gradients wrt Z
|
||||||
|
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||||
|
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
||||||
|
|
@ -961,6 +961,127 @@ class GradientTests(np.testing.TestCase):
|
||||||
m.randomize()
|
m.randomize()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
def test_multiout_regression(self):
|
||||||
|
np.random.seed(0)
|
||||||
|
import GPy
|
||||||
|
|
||||||
|
N = 10
|
||||||
|
N_train = 5
|
||||||
|
D = 4
|
||||||
|
noise_var = .3
|
||||||
|
|
||||||
|
k = GPy.kern.RBF(1,lengthscale=0.1)
|
||||||
|
x = np.random.rand(N,1)
|
||||||
|
cov = k.K(x)
|
||||||
|
|
||||||
|
k_r = GPy.kern.RBF(2,lengthscale=.4)
|
||||||
|
x_r = np.random.rand(D,2)
|
||||||
|
cov_r = k_r.K(x_r)
|
||||||
|
|
||||||
|
cov_all = np.kron(cov_r,cov)
|
||||||
|
L = GPy.util.linalg.jitchol(cov_all)
|
||||||
|
|
||||||
|
y_latent = L.dot(np.random.randn(N*D)).reshape(D,N).T
|
||||||
|
|
||||||
|
x_test = x[N_train:]
|
||||||
|
y_test = y_latent[N_train:]
|
||||||
|
x = x[:N_train]
|
||||||
|
y = y_latent[:N_train]+np.random.randn(N_train,D)*np.sqrt(noise_var)
|
||||||
|
|
||||||
|
Mr = D
|
||||||
|
Mc = x.shape[0]
|
||||||
|
Qr = 5
|
||||||
|
Qc = x.shape[1]
|
||||||
|
|
||||||
|
m_mr = GPy.models.GPMultioutRegression(x,y,Xr_dim=Qr, kernel_row=GPy.kern.RBF(Qr,ARD=True), num_inducing=(Mc,Mr),init='GP')
|
||||||
|
m_mr.optimize_auto(max_iters=1)
|
||||||
|
m_mr.randomize()
|
||||||
|
self.assertTrue(m_mr.checkgrad())
|
||||||
|
|
||||||
|
m_mr = GPy.models.GPMultioutRegression(x,y,Xr_dim=Qr, kernel_row=GPy.kern.RBF(Qr,ARD=True), num_inducing=(Mc,Mr),init='rand')
|
||||||
|
m_mr.optimize_auto(max_iters=1)
|
||||||
|
m_mr.randomize()
|
||||||
|
self.assertTrue(m_mr.checkgrad())
|
||||||
|
|
||||||
|
def test_multiout_regression_md(self):
|
||||||
|
import GPy
|
||||||
|
np.random.seed(0)
|
||||||
|
|
||||||
|
N = 20
|
||||||
|
N_train = 5
|
||||||
|
D = 8
|
||||||
|
noise_var = 0.3
|
||||||
|
|
||||||
|
k = GPy.kern.RBF(1,lengthscale=0.1)
|
||||||
|
x_raw = np.random.rand(N*D,1)
|
||||||
|
|
||||||
|
# dimension assignment
|
||||||
|
D_list = []
|
||||||
|
for i in range(2):
|
||||||
|
while True:
|
||||||
|
D_sub_list = []
|
||||||
|
ratios = []
|
||||||
|
r_p = 0.
|
||||||
|
for j in range(3):
|
||||||
|
ratios.append(np.random.rand()*(1-r_p)+r_p)
|
||||||
|
D_sub_list.append(int((ratios[-1]-r_p)*4*N_train))
|
||||||
|
r_p = ratios[-1]
|
||||||
|
D_sub_list.append(4*N_train - np.sum(D_sub_list))
|
||||||
|
if (np.array(D_sub_list)!=0).all():
|
||||||
|
D_list.extend([a+N-N_train for a in D_sub_list])
|
||||||
|
break
|
||||||
|
|
||||||
|
cov = k.K(x_raw)
|
||||||
|
|
||||||
|
k_r = GPy.kern.RBF(2,lengthscale=.4)
|
||||||
|
x_r = np.random.rand(D,2)
|
||||||
|
cov_r = k_r.K(x_r)
|
||||||
|
|
||||||
|
cov_all = np.repeat(np.repeat(cov_r,D_list,axis=0),D_list,axis=1)*cov
|
||||||
|
L = GPy.util.linalg.jitchol(cov_all)
|
||||||
|
|
||||||
|
y_latent = L.dot(np.random.randn(N*D))
|
||||||
|
|
||||||
|
x = np.zeros((D*N_train,))
|
||||||
|
y = np.zeros((D*N_train,))
|
||||||
|
x_test = np.zeros((D*(N-N_train),))
|
||||||
|
y_test = np.zeros((D*(N-N_train),))
|
||||||
|
indexD = np.zeros((D*N_train),dtype=np.int)
|
||||||
|
indexD_test = np.zeros((D*(N-N_train)),dtype=np.int)
|
||||||
|
|
||||||
|
offset_all = 0
|
||||||
|
offset_train = 0
|
||||||
|
offset_test = 0
|
||||||
|
for i in range(D):
|
||||||
|
D_test = N-N_train
|
||||||
|
D_train = D_list[i] - N+N_train
|
||||||
|
y[offset_train:offset_train+D_train] = y_latent[offset_all:offset_all+D_train]
|
||||||
|
x[offset_train:offset_train+D_train] = x_raw[offset_all:offset_all+D_train,0]
|
||||||
|
y_test[offset_test:offset_test+D_test] = y_latent[offset_all+D_train:offset_all+D_train+D_test]
|
||||||
|
x_test[offset_test:offset_test+D_test] = x_raw[offset_all+D_train:offset_all+D_train+D_test,0]
|
||||||
|
indexD[offset_train:offset_train+D_train] = i
|
||||||
|
indexD_test[offset_test:offset_test+D_test] = i
|
||||||
|
offset_train += D_train
|
||||||
|
offset_test += D_test
|
||||||
|
offset_all += D_train+D_test
|
||||||
|
|
||||||
|
y_noisefree = y.copy()
|
||||||
|
y += np.random.randn(*y.shape)*np.sqrt(noise_var)
|
||||||
|
x_flat = x.flatten()[:,None]
|
||||||
|
y_flat = y.flatten()[:,None]
|
||||||
|
|
||||||
|
Mr, Mc, Qr, Qc = 4,3,2,1
|
||||||
|
|
||||||
|
m = GPy.models.GPMultioutRegressionMD(x_flat,y_flat,indexD,Xr_dim=Qr, kernel_row=GPy.kern.RBF(Qr,ARD=False), num_inducing=(Mc,Mr))
|
||||||
|
m.optimize_auto(max_iters=1)
|
||||||
|
m.randomize()
|
||||||
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
m = GPy.models.GPMultioutRegressionMD(x_flat,y_flat,indexD,Xr_dim=Qr, kernel_row=GPy.kern.RBF(Qr,ARD=False), num_inducing=(Mc,Mr),init='rand')
|
||||||
|
m.optimize_auto(max_iters=1)
|
||||||
|
m.randomize()
|
||||||
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("Running unit tests, please be (very) patient...")
|
print("Running unit tests, please be (very) patient...")
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue