mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 12:32:40 +02:00
restructure rbf kernel
This commit is contained in:
parent
04ab93a961
commit
b945c20004
5 changed files with 372 additions and 620 deletions
|
|
@ -2,6 +2,10 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from ....core.parameterization.parameter_core import Pickleable
|
from ....core.parameterization.parameter_core import Pickleable
|
||||||
|
from GPy.util.caching import Cache_this
|
||||||
|
from ....core.parameterization import variational
|
||||||
|
import rbf_psi_comp
|
||||||
|
import ssrbf_psi_comp
|
||||||
|
|
||||||
class PSICOMP(Pickleable):
|
class PSICOMP(Pickleable):
|
||||||
|
|
||||||
|
|
@ -17,3 +21,22 @@ class PSICOMP(Pickleable):
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class PSICOMP_RBF(Pickleable):
|
||||||
|
|
||||||
|
@Cache_this(limit=1, ignore_args=(0,))
|
||||||
|
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
||||||
|
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||||
|
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||||
|
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||||
|
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||||
|
else:
|
||||||
|
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||||
|
|
||||||
|
@Cache_this(limit=1, ignore_args=(0,1,2,3))
|
||||||
|
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||||
|
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||||
|
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
|
||||||
|
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||||
|
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
|
||||||
|
else:
|
||||||
|
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||||
|
|
@ -3,168 +3,160 @@ The module for psi-statistics for RBF kernel
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from . import PSICOMP
|
from GPy.util.caching import Cacher
|
||||||
from GPy.util.caching import Cache_this
|
|
||||||
from ....util.misc import param_to_array
|
|
||||||
from scipy import weave
|
|
||||||
from ....util.config import *
|
|
||||||
|
|
||||||
class PSICOMP_RBF(PSICOMP):
|
def psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi0, psi1 and psi2
|
||||||
|
# Produced intermediate results:
|
||||||
|
# _psi1 NxM
|
||||||
|
mu = variational_posterior.mean
|
||||||
|
S = variational_posterior.variance
|
||||||
|
|
||||||
@Cache_this(limit=1, ignore_args=(0,))
|
psi0 = np.empty(mu.shape[0])
|
||||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
psi0[:] = variance
|
||||||
"""
|
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||||
Z - MxQ
|
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
|
||||||
mu - NxQ
|
return psi0, psi1, psi2
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi0, psi1 and psi2
|
|
||||||
# Produced intermediate results:
|
|
||||||
# _psi1 NxM
|
|
||||||
mu = variational_posterior.mean
|
|
||||||
S = variational_posterior.variance
|
|
||||||
|
|
||||||
psi0 = np.empty(mu.shape[0])
|
|
||||||
psi0[:] = variance
|
|
||||||
psi1 = self._psi1computations(variance, lengthscale, Z, mu, S)
|
|
||||||
psi2 = self._psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
|
|
||||||
return psi0, psi1, psi2
|
|
||||||
|
|
||||||
@Cache_this(limit=1, ignore_args=(0,))
|
|
||||||
def _psi1computations(self, variance, lengthscale, Z, mu, S):
|
|
||||||
"""
|
|
||||||
Z - MxQ
|
|
||||||
mu - NxQ
|
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi1
|
|
||||||
# Produced intermediate results:
|
|
||||||
# _psi1 NxM
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
|
||||||
|
|
||||||
# psi1
|
|
||||||
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
|
|
||||||
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
|
|
||||||
_psi1 = variance*np.exp(_psi1_log)
|
|
||||||
|
|
||||||
return _psi1
|
|
||||||
|
|
||||||
@Cache_this(limit=1, ignore_args=(0,))
|
|
||||||
def _psi2computations(self, variance, lengthscale, Z, mu, S):
|
|
||||||
"""
|
|
||||||
Z - MxQ
|
|
||||||
mu - NxQ
|
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi2
|
|
||||||
# Produced intermediate results:
|
|
||||||
# _psi2 MxM
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
|
||||||
|
|
||||||
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
|
|
||||||
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
|
|
||||||
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
|
|
||||||
denom = 1./(2.*S+lengthscale2)
|
|
||||||
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
|
|
||||||
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
|
|
||||||
|
|
||||||
|
|
||||||
return _psi2
|
|
||||||
|
|
||||||
@Cache_this(limit=1, ignore_args=(0,1,2,3))
|
|
||||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
|
||||||
ARD = (len(lengthscale)!=1)
|
|
||||||
|
|
||||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = self._psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
|
||||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = self._psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
|
||||||
|
|
||||||
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
|
||||||
|
|
||||||
dL_dlengscale = dl_psi1 + dl_psi2
|
|
||||||
if not ARD:
|
|
||||||
dL_dlengscale = dL_dlengscale.sum()
|
|
||||||
|
|
||||||
dL_dmu = dmu_psi1 + dmu_psi2
|
def __psi1computations(variance, lengthscale, Z, mu, S):
|
||||||
dL_dS = dS_psi1 + dS_psi2
|
"""
|
||||||
dL_dZ = dZ_psi1 + dZ_psi2
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
def _psi1compDer(self, dL_dpsi1, variance, lengthscale, Z, mu, S):
|
"""
|
||||||
"""
|
# here are the "statistics" for psi1
|
||||||
dL_dpsi1 - NxM
|
# Produced intermediate results:
|
||||||
Z - MxQ
|
# _psi1 NxM
|
||||||
mu - NxQ
|
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi1
|
|
||||||
# Produced intermediate results: dL_dparams w.r.t. psi1
|
|
||||||
# _dL_dvariance 1
|
|
||||||
# _dL_dlengthscale Q
|
|
||||||
# _dL_dZ MxQ
|
|
||||||
# _dL_dgamma NxQ
|
|
||||||
# _dL_dmu NxQ
|
|
||||||
# _dL_dS NxQ
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
|
||||||
|
|
||||||
_psi1 = self._psi1computations(variance, lengthscale, Z, mu, S)
|
|
||||||
Lpsi1 = dL_dpsi1*_psi1
|
|
||||||
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
|
|
||||||
denom = 1./(S+lengthscale2)
|
|
||||||
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
|
|
||||||
_dL_dvar = Lpsi1.sum()/variance
|
|
||||||
_dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom)
|
|
||||||
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
|
|
||||||
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
|
|
||||||
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
|
|
||||||
|
|
||||||
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
|
||||||
|
|
||||||
def _psi2compDer(self, dL_dpsi2, variance, lengthscale, Z, mu, S):
|
|
||||||
"""
|
|
||||||
Z - MxQ
|
|
||||||
mu - NxQ
|
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
dL_dpsi2 - MxM
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi2
|
|
||||||
# Produced the derivatives w.r.t. psi2:
|
|
||||||
# _dL_dvariance 1
|
|
||||||
# _dL_dlengthscale Q
|
|
||||||
# _dL_dZ MxQ
|
|
||||||
# _dL_dgamma NxQ
|
|
||||||
# _dL_dmu NxQ
|
|
||||||
# _dL_dS NxQ
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
|
||||||
denom = 1./(2*S+lengthscale2)
|
|
||||||
denom2 = np.square(denom)
|
|
||||||
|
|
||||||
_psi2 = self._psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
|
||||||
|
# psi1
|
||||||
|
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
|
||||||
|
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
|
||||||
|
_psi1 = variance*np.exp(_psi1_log)
|
||||||
|
|
||||||
Lpsi2 = dL_dpsi2[None,:,:]*_psi2
|
return _psi1
|
||||||
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
|
|
||||||
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
|
def __psi2computations(variance, lengthscale, Z, mu, S):
|
||||||
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
|
"""
|
||||||
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
|
Z - MxQ
|
||||||
Lpsi2Zhat = Lpsi2Z
|
mu - NxQ
|
||||||
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
_dL_dvar = Lpsi2sum.sum()*2/variance
|
"""
|
||||||
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
|
# here are the "statistics" for psi2
|
||||||
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
|
# Produced intermediate results:
|
||||||
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
|
# _psi2 MxM
|
||||||
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
|
||||||
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
|
|
||||||
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
|
|
||||||
|
|
||||||
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
|
||||||
|
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
|
||||||
|
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
|
||||||
|
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
|
||||||
|
denom = 1./(2.*S+lengthscale2)
|
||||||
|
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
|
||||||
|
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
|
||||||
|
|
||||||
|
|
||||||
|
return _psi2
|
||||||
|
|
||||||
|
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||||
|
ARD = (len(lengthscale)!=1)
|
||||||
|
|
||||||
|
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||||
|
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||||
|
|
||||||
|
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||||
|
|
||||||
|
dL_dlengscale = dl_psi1 + dl_psi2
|
||||||
|
if not ARD:
|
||||||
|
dL_dlengscale = dL_dlengscale.sum()
|
||||||
|
|
||||||
|
dL_dmu = dmu_psi1 + dmu_psi2
|
||||||
|
dL_dS = dS_psi1 + dS_psi2
|
||||||
|
dL_dZ = dZ_psi1 + dZ_psi2
|
||||||
|
|
||||||
|
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
||||||
|
|
||||||
|
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
|
||||||
|
"""
|
||||||
|
dL_dpsi1 - NxM
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi1
|
||||||
|
# Produced intermediate results: dL_dparams w.r.t. psi1
|
||||||
|
# _dL_dvariance 1
|
||||||
|
# _dL_dlengthscale Q
|
||||||
|
# _dL_dZ MxQ
|
||||||
|
# _dL_dgamma NxQ
|
||||||
|
# _dL_dmu NxQ
|
||||||
|
# _dL_dS NxQ
|
||||||
|
|
||||||
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
|
||||||
|
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||||
|
Lpsi1 = dL_dpsi1*_psi1
|
||||||
|
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
|
||||||
|
denom = 1./(S+lengthscale2)
|
||||||
|
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
|
||||||
|
_dL_dvar = Lpsi1.sum()/variance
|
||||||
|
_dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom)
|
||||||
|
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
|
||||||
|
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
|
||||||
|
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
|
||||||
|
|
||||||
|
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||||
|
|
||||||
|
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
dL_dpsi2 - MxM
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi2
|
||||||
|
# Produced the derivatives w.r.t. psi2:
|
||||||
|
# _dL_dvariance 1
|
||||||
|
# _dL_dlengthscale Q
|
||||||
|
# _dL_dZ MxQ
|
||||||
|
# _dL_dgamma NxQ
|
||||||
|
# _dL_dmu NxQ
|
||||||
|
# _dL_dS NxQ
|
||||||
|
|
||||||
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
denom = 1./(2*S+lengthscale2)
|
||||||
|
denom2 = np.square(denom)
|
||||||
|
|
||||||
|
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
||||||
|
|
||||||
|
Lpsi2 = dL_dpsi2[None,:,:]*_psi2
|
||||||
|
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
|
||||||
|
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
|
||||||
|
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
|
||||||
|
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
|
||||||
|
Lpsi2Zhat = Lpsi2Z
|
||||||
|
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
|
||||||
|
|
||||||
|
_dL_dvar = Lpsi2sum.sum()*2/variance
|
||||||
|
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
|
||||||
|
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
|
||||||
|
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
|
||||||
|
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
||||||
|
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
|
||||||
|
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
|
||||||
|
|
||||||
|
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||||
|
|
||||||
|
_psi1computations = Cacher(__psi1computations, limit=1)
|
||||||
|
_psi2computations = Cacher(__psi2computations, limit=1)
|
||||||
|
|
|
||||||
|
|
@ -6,200 +6,194 @@ The package for the psi statistics computation
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from . import PSICOMP
|
|
||||||
from GPy.util.caching import Cache_this
|
|
||||||
|
|
||||||
class PSICOMP_SSRBF(PSICOMP):
|
def psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi0, psi1 and psi2
|
||||||
|
# Produced intermediate results:
|
||||||
|
# _psi1 NxM
|
||||||
|
mu = variational_posterior.mean
|
||||||
|
S = variational_posterior.variance
|
||||||
|
gamma = variational_posterior.binary_prob
|
||||||
|
|
||||||
@Cache_this(limit=1, ignore_args=(0,))
|
psi0 = np.empty(mu.shape[0])
|
||||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
psi0[:] = variance
|
||||||
"""
|
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
|
||||||
Z - MxQ
|
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
|
||||||
mu - NxQ
|
return psi0, psi1, psi2
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
||||||
"""
|
"""
|
||||||
# here are the "statistics" for psi0, psi1 and psi2
|
Z - MxQ
|
||||||
# Produced intermediate results:
|
mu - NxQ
|
||||||
# _psi1 NxM
|
S - NxQ
|
||||||
mu = variational_posterior.mean
|
gamma - NxQ
|
||||||
S = variational_posterior.variance
|
"""
|
||||||
gamma = variational_posterior.binary_prob
|
# here are the "statistics" for psi1
|
||||||
|
# Produced intermediate results:
|
||||||
psi0 = np.empty(mu.shape[0])
|
# _psi1 NxM
|
||||||
psi0[:] = variance
|
|
||||||
psi1 = self._psi1computations(variance, lengthscale, Z, mu, S, gamma)
|
lengthscale2 = np.square(lengthscale)
|
||||||
psi2 = self._psi2computations(variance, lengthscale, Z, mu, S, gamma)
|
|
||||||
return psi0, psi1, psi2
|
# psi1
|
||||||
|
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
|
||||||
|
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
|
||||||
|
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
||||||
|
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
|
||||||
|
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
|
||||||
|
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
|
||||||
|
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
|
||||||
|
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||||
|
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||||
|
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||||
|
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||||
|
|
||||||
|
return _psi1
|
||||||
|
|
||||||
|
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi2
|
||||||
|
# Produced intermediate results:
|
||||||
|
# _psi2 MxM
|
||||||
|
|
||||||
def _psi1computations(self, variance, lengthscale, Z, mu, S, gamma):
|
lengthscale2 = np.square(lengthscale)
|
||||||
"""
|
|
||||||
Z - MxQ
|
|
||||||
mu - NxQ
|
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi1
|
|
||||||
# Produced intermediate results:
|
|
||||||
# _psi1 NxM
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||||
|
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||||
|
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||||
|
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||||
|
|
||||||
|
# psi2
|
||||||
|
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
|
||||||
|
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||||
|
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
||||||
|
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
|
||||||
|
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
|
||||||
|
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
||||||
|
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
||||||
|
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||||
|
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||||
|
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||||
|
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||||
|
|
||||||
|
return _psi2
|
||||||
|
|
||||||
|
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||||
|
ARD = (len(lengthscale)!=1)
|
||||||
|
|
||||||
# psi1
|
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||||
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
|
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
|
|
||||||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
|
|
||||||
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
|
|
||||||
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
|
|
||||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
|
|
||||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
|
||||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
|
||||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
|
||||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
|
||||||
|
|
||||||
return _psi1
|
dL_dlengscale = dl_psi1 + dl_psi2
|
||||||
|
if not ARD:
|
||||||
|
dL_dlengscale = dL_dlengscale.sum()
|
||||||
|
|
||||||
|
dL_dgamma = dgamma_psi1 + dgamma_psi2
|
||||||
|
dL_dmu = dmu_psi1 + dmu_psi2
|
||||||
|
dL_dS = dS_psi1 + dS_psi2
|
||||||
|
dL_dZ = dZ_psi1 + dZ_psi2
|
||||||
|
|
||||||
def _psi2computations(self, variance, lengthscale, Z, mu, S, gamma):
|
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
|
||||||
"""
|
|
||||||
Z - MxQ
|
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
|
||||||
mu - NxQ
|
"""
|
||||||
S - NxQ
|
dL_dpsi1 - NxM
|
||||||
gamma - NxQ
|
Z - MxQ
|
||||||
"""
|
mu - NxQ
|
||||||
# here are the "statistics" for psi2
|
S - NxQ
|
||||||
# Produced intermediate results:
|
gamma - NxQ
|
||||||
# _psi2 MxM
|
"""
|
||||||
|
# here are the "statistics" for psi1
|
||||||
lengthscale2 = np.square(lengthscale)
|
# Produced intermediate results: dL_dparams w.r.t. psi1
|
||||||
|
# _dL_dvariance 1
|
||||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
# _dL_dlengthscale Q
|
||||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
# _dL_dZ MxQ
|
||||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
# _dL_dgamma NxQ
|
||||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
# _dL_dmu NxQ
|
||||||
|
# _dL_dS NxQ
|
||||||
|
|
||||||
# psi2
|
lengthscale2 = np.square(lengthscale)
|
||||||
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
|
|
||||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
# psi1
|
||||||
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
_psi1_denom = S / lengthscale2 + 1. # NxQ
|
||||||
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
|
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
|
||||||
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
|
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
||||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
|
||||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
|
||||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
|
||||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
|
||||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||||
|
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||||
|
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
|
||||||
|
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
|
||||||
|
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
|
||||||
|
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||||
|
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
|
||||||
|
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
|
||||||
|
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
|
||||||
|
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
|
||||||
|
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
|
||||||
|
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
|
||||||
|
|
||||||
|
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||||
|
|
||||||
|
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
|
||||||
|
"""
|
||||||
|
Z - MxQ
|
||||||
|
mu - NxQ
|
||||||
|
S - NxQ
|
||||||
|
gamma - NxQ
|
||||||
|
dL_dpsi2 - MxM
|
||||||
|
"""
|
||||||
|
# here are the "statistics" for psi2
|
||||||
|
# Produced the derivatives w.r.t. psi2:
|
||||||
|
# _dL_dvariance 1
|
||||||
|
# _dL_dlengthscale Q
|
||||||
|
# _dL_dZ MxQ
|
||||||
|
# _dL_dgamma NxQ
|
||||||
|
# _dL_dmu NxQ
|
||||||
|
# _dL_dS NxQ
|
||||||
|
|
||||||
return _psi2
|
lengthscale2 = np.square(lengthscale)
|
||||||
|
|
||||||
@Cache_this(limit=1, ignore_args=(0,1,2,3))
|
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||||
ARD = (len(lengthscale)!=1)
|
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||||
|
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = self._psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
|
||||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = self._psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
# psi2
|
||||||
|
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
|
||||||
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||||
|
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
||||||
dL_dlengscale = dl_psi1 + dl_psi2
|
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
|
||||||
if not ARD:
|
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
|
||||||
dL_dlengscale = dL_dlengscale.sum()
|
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
||||||
|
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
||||||
dL_dgamma = dgamma_psi1 + dgamma_psi2
|
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||||
dL_dmu = dmu_psi1 + dmu_psi2
|
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||||
dL_dS = dS_psi1 + dS_psi2
|
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||||
dL_dZ = dZ_psi1 + dZ_psi2
|
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
||||||
|
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
|
||||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
|
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
|
||||||
|
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||||
def _psi1compDer(self, dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
|
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
|
||||||
"""
|
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
|
||||||
dL_dpsi1 - NxM
|
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
|
||||||
Z - MxQ
|
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
|
||||||
mu - NxQ
|
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
|
||||||
S - NxQ
|
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
|
||||||
gamma - NxQ
|
|
||||||
"""
|
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||||
# here are the "statistics" for psi1
|
|
||||||
# Produced intermediate results: dL_dparams w.r.t. psi1
|
|
||||||
# _dL_dvariance 1
|
|
||||||
# _dL_dlengthscale Q
|
|
||||||
# _dL_dZ MxQ
|
|
||||||
# _dL_dgamma NxQ
|
|
||||||
# _dL_dmu NxQ
|
|
||||||
# _dL_dS NxQ
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
|
||||||
|
|
||||||
# psi1
|
|
||||||
_psi1_denom = S / lengthscale2 + 1. # NxQ
|
|
||||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
|
|
||||||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
|
||||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
|
|
||||||
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
|
|
||||||
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
|
|
||||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
|
|
||||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
|
||||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
|
||||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
|
||||||
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
|
|
||||||
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
|
|
||||||
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
|
|
||||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
|
||||||
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
|
|
||||||
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
|
|
||||||
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
|
|
||||||
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
|
|
||||||
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
|
|
||||||
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
|
|
||||||
|
|
||||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
|
||||||
|
|
||||||
def _psi2compDer(self, dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
|
|
||||||
"""
|
|
||||||
Z - MxQ
|
|
||||||
mu - NxQ
|
|
||||||
S - NxQ
|
|
||||||
gamma - NxQ
|
|
||||||
dL_dpsi2 - MxM
|
|
||||||
"""
|
|
||||||
# here are the "statistics" for psi2
|
|
||||||
# Produced the derivatives w.r.t. psi2:
|
|
||||||
# _dL_dvariance 1
|
|
||||||
# _dL_dlengthscale Q
|
|
||||||
# _dL_dZ MxQ
|
|
||||||
# _dL_dgamma NxQ
|
|
||||||
# _dL_dmu NxQ
|
|
||||||
# _dL_dS NxQ
|
|
||||||
|
|
||||||
lengthscale2 = np.square(lengthscale)
|
|
||||||
|
|
||||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
|
||||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
|
||||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
|
||||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
|
||||||
|
|
||||||
# psi2
|
|
||||||
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
|
|
||||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
|
||||||
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
|
||||||
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
|
|
||||||
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
|
|
||||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
|
||||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
|
||||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
|
||||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
|
||||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
|
||||||
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
|
||||||
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
|
|
||||||
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
|
|
||||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
|
||||||
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
|
|
||||||
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
|
|
||||||
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
|
|
||||||
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
|
|
||||||
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
|
|
||||||
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
|
|
||||||
|
|
||||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from ...util.misc import param_to_array
|
||||||
from stationary import Stationary
|
from stationary import Stationary
|
||||||
from GPy.util.caching import Cache_this
|
from GPy.util.caching import Cache_this
|
||||||
from ...core.parameterization import variational
|
from ...core.parameterization import variational
|
||||||
from psi_comp import ssrbf_psi_comp,ssrbf_psi_gpucomp,rbf_psi_comp
|
from psi_comp import PSICOMP_RBF
|
||||||
from ...util.config import *
|
from ...util.config import *
|
||||||
|
|
||||||
class RBF(Stationary):
|
class RBF(Stationary):
|
||||||
|
|
@ -25,13 +25,7 @@ class RBF(Stationary):
|
||||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
||||||
self.weave_options = {}
|
self.weave_options = {}
|
||||||
self.group_spike_prob = False
|
self.group_spike_prob = False
|
||||||
self.psicomp = rbf_psi_comp.PSICOMP_RBF()
|
self.psicomp = PSICOMP_RBF()
|
||||||
|
|
||||||
def set_for_SpikeAndSlab(self):
|
|
||||||
if self.useGPU:
|
|
||||||
self.psicomp = ssrbf_psi_gpucomp.PSICOMP_SSRBF()
|
|
||||||
else:
|
|
||||||
self.psicomp = ssrbf_psi_comp.PSICOMP_SSRBF()
|
|
||||||
|
|
||||||
def K_of_r(self, r):
|
def K_of_r(self, r):
|
||||||
return self.variance * np.exp(-0.5 * r**2)
|
return self.variance * np.exp(-0.5 * r**2)
|
||||||
|
|
@ -44,289 +38,39 @@ class RBF(Stationary):
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
if self.useGPU:
|
||||||
if self.useGPU:
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
|
|
||||||
else:
|
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
|
|
||||||
else:
|
else:
|
||||||
# return self.Kdiag(variational_posterior.mean)
|
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
|
||||||
|
|
||||||
def psi1(self, Z, variational_posterior):
|
def psi1(self, Z, variational_posterior):
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
if self.useGPU:
|
||||||
if self.useGPU:
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
|
|
||||||
else:
|
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
|
||||||
else:
|
else:
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
||||||
|
|
||||||
def psi2(self, Z, variational_posterior):
|
def psi2(self, Z, variational_posterior):
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
if self.useGPU:
|
||||||
if self.useGPU:
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
|
|
||||||
else:
|
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
|
|
||||||
else:
|
else:
|
||||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
|
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||||
|
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
# Spike-and-Slab GPLVM
|
if self.useGPU:
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||||
if self.useGPU:
|
else:
|
||||||
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
|
||||||
else:
|
|
||||||
dL_dvar, dL_dlengscale, _, _, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
|
||||||
self.variance.gradient = dL_dvar
|
|
||||||
self.lengthscale.gradient = dL_dlengscale
|
|
||||||
|
|
||||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
|
||||||
dL_dvar, dL_dlengscale, _, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
|
||||||
self.variance.gradient = dL_dvar
|
self.variance.gradient = dL_dvar
|
||||||
self.lengthscale.gradient = dL_dlengscale
|
self.lengthscale.gradient = dL_dlengscale
|
||||||
|
|
||||||
# l2 = self.lengthscale**2
|
|
||||||
# if l2.size != self.input_dim:
|
|
||||||
# l2 = l2*np.ones(self.input_dim)
|
|
||||||
# #contributions from psi0:
|
|
||||||
# self.variance.gradient = np.sum(dL_dpsi0)
|
|
||||||
# self.lengthscale.gradient = 0.
|
|
||||||
#
|
|
||||||
# # #from psi1
|
|
||||||
# denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
|
|
||||||
# d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
|
|
||||||
# dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
|
||||||
# print dpsi1_dlength.sum(0).sum(0)
|
|
||||||
# if self.ARD:
|
|
||||||
# self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
|
|
||||||
# else:
|
|
||||||
# self.lengthscale.gradient += dpsi1_dlength.sum()
|
|
||||||
# self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
|
|
||||||
# #from psi2
|
|
||||||
# S = variational_posterior.variance
|
|
||||||
# _, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
|
|
||||||
# if not self.ARD:
|
|
||||||
# self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum()
|
|
||||||
# else:
|
|
||||||
# self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
|
|
||||||
# # print self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
|
|
||||||
#
|
|
||||||
# self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
|
||||||
|
|
||||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
# Spike-and-Slab GPLVM
|
if self.useGPU:
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||||
if self.useGPU:
|
|
||||||
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
|
||||||
else:
|
|
||||||
_, _, dL_dZ, _, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
|
||||||
return dL_dZ
|
|
||||||
|
|
||||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
|
||||||
_, _, dL_dZ, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
|
||||||
return dL_dZ
|
|
||||||
#
|
|
||||||
# l2 = self.lengthscale **2
|
|
||||||
#
|
|
||||||
# #psi1
|
|
||||||
# denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
|
|
||||||
# grad = np.einsum('ij,ij,ijk,ijk->jk', dL_dpsi1, psi1, dist, -1./(denom*l2))
|
|
||||||
#
|
|
||||||
# #psi2
|
|
||||||
# Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
|
|
||||||
# term1 = Zdist / l2 # M, M, Q
|
|
||||||
# S = variational_posterior.variance
|
|
||||||
# term2 = mudist / (2.*S[:,None,None,:] + l2) # N, M, M, Q
|
|
||||||
#
|
|
||||||
# grad += 2.*np.einsum('ijk,ijk,ijkl->kl', dL_dpsi2, psi2, term1[None,:,:,:] + term2)
|
|
||||||
#
|
|
||||||
# return grad
|
|
||||||
else:
|
else:
|
||||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||||
|
|
||||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
# Spike-and-Slab GPLVM
|
if self.useGPU:
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||||
if self.useGPU:
|
|
||||||
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
|
||||||
else:
|
|
||||||
_, _, _, dL_dmu, dL_dS, dL_dgamma = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
|
||||||
return dL_dmu, dL_dS, dL_dgamma
|
|
||||||
|
|
||||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
|
||||||
_, _, _, dL_dmu, dL_dS = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
|
||||||
|
|
||||||
# l2 = self.lengthscale **2
|
|
||||||
# #psi1
|
|
||||||
# denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
|
|
||||||
# tmp = psi1[:, :, None] / l2 / denom
|
|
||||||
# grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1)
|
|
||||||
# grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1)
|
|
||||||
# #psi2
|
|
||||||
# _, _, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
|
|
||||||
# S = variational_posterior.variance
|
|
||||||
# tmp = psi2[:, :, :, None] / (2.*S[:,None,None,:] + l2)
|
|
||||||
# grad_mu += -2.*np.einsum('jk,ijkl,ijkl->il', dL_dpsi2, tmp , mudist)
|
|
||||||
# grad_S += np.einsum('jk,ijkl,ijkl->il', dL_dpsi2 , tmp , (2.*mudist_sq - 1))
|
|
||||||
|
|
||||||
return dL_dmu, dL_dS
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]
|
||||||
|
|
||||||
#return grad_mu, grad_S
|
|
||||||
|
|
||||||
#---------------------------------------#
|
|
||||||
# Precomputations #
|
|
||||||
#---------------------------------------#
|
|
||||||
|
|
||||||
@Cache_this(limit=1)
|
|
||||||
def _psi1computations(self, Z, vp):
|
|
||||||
mu, S = vp.mean, vp.variance
|
|
||||||
l2 = self.lengthscale **2
|
|
||||||
denom = S[:, None, :] / l2 + 1. # N,1,Q
|
|
||||||
dist = Z[None, :, :] - mu[:, None, :] # N,M,Q
|
|
||||||
dist_sq = np.square(dist) / l2 / denom # N,M,Q
|
|
||||||
exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M
|
|
||||||
psi1 = self.variance * np.exp(exponent) # N,M
|
|
||||||
return denom, dist, dist_sq, psi1
|
|
||||||
|
|
||||||
|
|
||||||
@Cache_this(limit=1, ignore_args=(0,))
|
|
||||||
def _Z_distances(self, Z):
|
|
||||||
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
|
||||||
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
|
||||||
return Zhat, Zdist
|
|
||||||
|
|
||||||
@Cache_this(limit=1)
|
|
||||||
def _psi2computations(self, Z, vp):
|
|
||||||
|
|
||||||
if config.getboolean('parallel', 'openmp'):
|
|
||||||
pragma_string = '#pragma omp parallel for private(tmp, exponent_tmp)'
|
|
||||||
header_string = '#include <omp.h>'
|
|
||||||
libraries = ['gomp']
|
|
||||||
else:
|
|
||||||
pragma_string = ''
|
|
||||||
header_string = ''
|
|
||||||
libraries = []
|
|
||||||
|
|
||||||
mu, S = vp.mean, vp.variance
|
|
||||||
|
|
||||||
N, Q = mu.shape
|
|
||||||
M = Z.shape[0]
|
|
||||||
|
|
||||||
#compute required distances
|
|
||||||
Zhat, Zdist = self._Z_distances(Z)
|
|
||||||
Zdist_sq = np.square(Zdist / self.lengthscale) # M,M,Q
|
|
||||||
|
|
||||||
#allocate memory for the things we want to compute
|
|
||||||
mudist = np.empty((N, M, M, Q))
|
|
||||||
mudist_sq = np.empty((N, M, M, Q))
|
|
||||||
psi2 = np.empty((N, M, M))
|
|
||||||
|
|
||||||
l2 = self.lengthscale **2
|
|
||||||
denom = (2.*S[:,None,None,:] / l2) + 1. # N,Q
|
|
||||||
half_log_denom = 0.5 * np.log(denom[:,0,0,:])
|
|
||||||
denom_l2 = denom[:,0,0,:]*l2
|
|
||||||
|
|
||||||
variance_sq = float(np.square(self.variance))
|
|
||||||
code = """
|
|
||||||
double tmp, exponent_tmp;
|
|
||||||
%s
|
|
||||||
for (int n=0; n<N; n++)
|
|
||||||
{
|
|
||||||
for (int m=0; m<M; m++)
|
|
||||||
{
|
|
||||||
for (int mm=0; mm<(m+1); mm++)
|
|
||||||
{
|
|
||||||
exponent_tmp = 0.0;
|
|
||||||
for (int q=0; q<Q; q++)
|
|
||||||
{
|
|
||||||
//compute mudist
|
|
||||||
tmp = mu(n,q) - Zhat(m,mm,q);
|
|
||||||
mudist(n,m,mm,q) = tmp;
|
|
||||||
mudist(n,mm,m,q) = tmp;
|
|
||||||
|
|
||||||
//now mudist_sq
|
|
||||||
tmp = tmp*tmp/denom_l2(n,q);
|
|
||||||
mudist_sq(n,m,mm,q) = tmp;
|
|
||||||
mudist_sq(n,mm,m,q) = tmp;
|
|
||||||
|
|
||||||
//now exponent
|
|
||||||
tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q);
|
|
||||||
exponent_tmp += tmp;
|
|
||||||
}
|
|
||||||
//compute psi2 by exponentiating
|
|
||||||
psi2(n,m,mm) = variance_sq * exp(exponent_tmp);
|
|
||||||
psi2(n,mm,m) = psi2(n,m,mm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
""" % pragma_string
|
|
||||||
|
|
||||||
support_code = """
|
|
||||||
%s
|
|
||||||
#include <math.h>
|
|
||||||
""" % header_string
|
|
||||||
mu = param_to_array(mu)
|
|
||||||
weave.inline(code, support_code=support_code, libraries=libraries,
|
|
||||||
arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
|
|
||||||
type_converters=weave.converters.blitz, **self.weave_options)
|
|
||||||
|
|
||||||
return Zdist, Zdist_sq, mudist, mudist_sq, psi2
|
|
||||||
|
|
||||||
def _weave_psi2_lengthscale_grads(self, dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2):
|
|
||||||
|
|
||||||
#here's the einsum equivalent, it's ~3 times slower
|
|
||||||
#return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale
|
|
||||||
|
|
||||||
result = np.zeros(self.input_dim)
|
|
||||||
if config.getboolean('parallel', 'openmp'):
|
|
||||||
pragma_string = '#pragma omp parallel for reduction(+:tmp)'
|
|
||||||
header_string = '#include <omp.h>'
|
|
||||||
libraries = ['gomp']
|
|
||||||
else:
|
|
||||||
pragma_string = ''
|
|
||||||
header_string = ''
|
|
||||||
libraries = []
|
|
||||||
code = """
|
|
||||||
double tmp;
|
|
||||||
for(int q=0; q<Q; q++)
|
|
||||||
{
|
|
||||||
tmp = 0.0;
|
|
||||||
%s
|
|
||||||
for(int n=0; n<N; n++)
|
|
||||||
{
|
|
||||||
for(int m=0; m<M; m++)
|
|
||||||
{
|
|
||||||
//diag terms
|
|
||||||
tmp += dL_dpsi2(n,m,m) * psi2(n,m,m) * (Zdist_sq(m,m,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,m,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
|
|
||||||
|
|
||||||
//off-diag terms
|
|
||||||
for(int mm=0; mm<m; mm++)
|
|
||||||
{
|
|
||||||
tmp += 2.0 * dL_dpsi2(n,m,mm) * psi2(n,m,mm) * (Zdist_sq(m,mm,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,mm,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
result(q) = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
""" % pragma_string
|
|
||||||
support_code = """
|
|
||||||
%s
|
|
||||||
#include <math.h>
|
|
||||||
""" % header_string
|
|
||||||
N,Q = S.shape
|
|
||||||
M = psi2.shape[-1]
|
|
||||||
|
|
||||||
S = param_to_array(S)
|
|
||||||
weave.inline(code, support_code=support_code, libraries=libraries,
|
|
||||||
arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'],
|
|
||||||
type_converters=weave.converters.blitz, **self.weave_options)
|
|
||||||
|
|
||||||
return 2.*result*self.lengthscale
|
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,6 @@ class SSGPLVM(SparseGP):
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
||||||
kernel.set_for_SpikeAndSlab()
|
|
||||||
|
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
|
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue