From b945c2000467d8217b83170864b22a386d560859 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Wed, 21 May 2014 16:42:35 +0100 Subject: [PATCH] restructure rbf kernel --- GPy/kern/_src/psi_comp/__init__.py | 23 ++ GPy/kern/_src/psi_comp/rbf_psi_comp.py | 308 +++++++++---------- GPy/kern/_src/psi_comp/ssrbf_psi_comp.py | 368 +++++++++++------------ GPy/kern/_src/rbf.py | 292 ++---------------- GPy/models/ss_gplvm.py | 1 - 5 files changed, 372 insertions(+), 620 deletions(-) diff --git a/GPy/kern/_src/psi_comp/__init__.py b/GPy/kern/_src/psi_comp/__init__.py index 17ea2e83..77668e7f 100644 --- a/GPy/kern/_src/psi_comp/__init__.py +++ b/GPy/kern/_src/psi_comp/__init__.py @@ -2,6 +2,10 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from ....core.parameterization.parameter_core import Pickleable +from GPy.util.caching import Cache_this +from ....core.parameterization import variational +import rbf_psi_comp +import ssrbf_psi_comp class PSICOMP(Pickleable): @@ -17,3 +21,22 @@ class PSICOMP(Pickleable): """ pass +class PSICOMP_RBF(Pickleable): + + @Cache_this(limit=1, ignore_args=(0,)) + def psicomputations(self, variance, lengthscale, Z, variational_posterior): + if isinstance(variational_posterior, variational.NormalPosterior): + return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) + elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) + else: + raise ValueError, "unknown distriubtion received for psi-statistics" + + @Cache_this(limit=1, ignore_args=(0,1,2,3)) + def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + if isinstance(variational_posterior, variational.NormalPosterior): + return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) + elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) + else: + raise ValueError, "unknown distriubtion received for psi-statistics" \ No newline at end of file diff --git a/GPy/kern/_src/psi_comp/rbf_psi_comp.py b/GPy/kern/_src/psi_comp/rbf_psi_comp.py index d5cd8951..93399ea7 100644 --- a/GPy/kern/_src/psi_comp/rbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/rbf_psi_comp.py @@ -3,168 +3,160 @@ The module for psi-statistics for RBF kernel """ import numpy as np -from . import PSICOMP -from GPy.util.caching import Cache_this -from ....util.misc import param_to_array -from scipy import weave -from ....util.config import * +from GPy.util.caching import Cacher -class PSICOMP_RBF(PSICOMP): +def psicomputations(variance, lengthscale, Z, variational_posterior): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi0, psi1 and psi2 + # Produced intermediate results: + # _psi1 NxM + mu = variational_posterior.mean + S = variational_posterior.variance - @Cache_this(limit=1, ignore_args=(0,)) - def psicomputations(self, variance, lengthscale, Z, variational_posterior): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi0, psi1 and psi2 - # Produced intermediate results: - # _psi1 NxM - mu = variational_posterior.mean - S = variational_posterior.variance - - psi0 = np.empty(mu.shape[0]) - psi0[:] = variance - psi1 = self._psi1computations(variance, lengthscale, Z, mu, S) - psi2 = self._psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0) - return psi0, psi1, psi2 - - @Cache_this(limit=1, ignore_args=(0,)) - def _psi1computations(self, variance, lengthscale, Z, mu, S): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 - # Produced intermediate results: - # _psi1 NxM - - lengthscale2 = np.square(lengthscale) - - # psi1 - _psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N - _psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.) - _psi1 = variance*np.exp(_psi1_log) - - return _psi1 - - @Cache_this(limit=1, ignore_args=(0,)) - def _psi2computations(self, variance, lengthscale, Z, mu, S): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi2 - # Produced intermediate results: - # _psi2 MxM - - lengthscale2 = np.square(lengthscale) - - _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N - _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM - Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ - denom = 1./(2.*S+lengthscale2) - _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom) - _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) - - - return _psi2 - - @Cache_this(limit=1, ignore_args=(0,1,2,3)) - def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): - ARD = (len(lengthscale)!=1) - - dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = self._psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) - dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = self._psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) - - dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 - - dL_dlengscale = dl_psi1 + dl_psi2 - if not ARD: - dL_dlengscale = dL_dlengscale.sum() + psi0 = np.empty(mu.shape[0]) + psi0[:] = variance + psi1 = _psi1computations(variance, lengthscale, Z, mu, S) + psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0) + return psi0, psi1, psi2 - dL_dmu = dmu_psi1 + dmu_psi2 - dL_dS = dS_psi1 + dS_psi2 - dL_dZ = dZ_psi1 + dZ_psi2 - - return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS - - def _psi1compDer(self, dL_dpsi1, variance, lengthscale, Z, mu, S): - """ - dL_dpsi1 - NxM - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 - # Produced intermediate results: dL_dparams w.r.t. psi1 - # _dL_dvariance 1 - # _dL_dlengthscale Q - # _dL_dZ MxQ - # _dL_dgamma NxQ - # _dL_dmu NxQ - # _dL_dS NxQ - - lengthscale2 = np.square(lengthscale) - - _psi1 = self._psi1computations(variance, lengthscale, Z, mu, S) - Lpsi1 = dL_dpsi1*_psi1 - Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ - denom = 1./(S+lengthscale2) - Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ - _dL_dvar = Lpsi1.sum()/variance - _dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom) - _dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2. - _dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom) - _dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale) - - return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS - - def _psi2compDer(self, dL_dpsi2, variance, lengthscale, Z, mu, S): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - dL_dpsi2 - MxM - """ - # here are the "statistics" for psi2 - # Produced the derivatives w.r.t. psi2: - # _dL_dvariance 1 - # _dL_dlengthscale Q - # _dL_dZ MxQ - # _dL_dgamma NxQ - # _dL_dmu NxQ - # _dL_dS NxQ - - lengthscale2 = np.square(lengthscale) - denom = 1./(2*S+lengthscale2) - denom2 = np.square(denom) +def __psi1computations(variance, lengthscale, Z, mu, S): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: + # _psi1 NxM - _psi2 = self._psi2computations(variance, lengthscale, Z, mu, S) # NxMxM + lengthscale2 = np.square(lengthscale) + + # psi1 + _psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N + _psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.) + _psi1 = variance*np.exp(_psi1_log) - Lpsi2 = dL_dpsi2[None,:,:]*_psi2 - Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N - Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ - Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ - Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ - Lpsi2Zhat = Lpsi2Z - Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2 - - _dL_dvar = Lpsi2sum.sum()*2/variance - _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) - _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] - _dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \ - 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) - _dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))- - (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) + return _psi1 + +def __psi2computations(variance, lengthscale, Z, mu, S): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi2 + # Produced intermediate results: + # _psi2 MxM - return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS + lengthscale2 = np.square(lengthscale) + _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N + _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM + Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ + denom = 1./(2.*S+lengthscale2) + _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom) + _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) + + + return _psi2 + +def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + ARD = (len(lengthscale)!=1) + + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) + + dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 + + dL_dlengscale = dl_psi1 + dl_psi2 + if not ARD: + dL_dlengscale = dL_dlengscale.sum() + + dL_dmu = dmu_psi1 + dmu_psi2 + dL_dS = dS_psi1 + dS_psi2 + dL_dZ = dZ_psi1 + dZ_psi2 + + return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS + +def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): + """ + dL_dpsi1 - NxM + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: dL_dparams w.r.t. psi1 + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + + _psi1 = _psi1computations(variance, lengthscale, Z, mu, S) + Lpsi1 = dL_dpsi1*_psi1 + Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ + denom = 1./(S+lengthscale2) + Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ + _dL_dvar = Lpsi1.sum()/variance + _dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom) + _dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2. + _dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom) + _dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale) + + return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS + +def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + dL_dpsi2 - MxM + """ + # here are the "statistics" for psi2 + # Produced the derivatives w.r.t. psi2: + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + denom = 1./(2*S+lengthscale2) + denom2 = np.square(denom) + + _psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM + + Lpsi2 = dL_dpsi2[None,:,:]*_psi2 + Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N + Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ + Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ + Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ + Lpsi2Zhat = Lpsi2Z + Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2 + + _dL_dvar = Lpsi2sum.sum()*2/variance + _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) + _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] + _dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \ + 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) + _dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))- + (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) + + return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS + +_psi1computations = Cacher(__psi1computations, limit=1) +_psi2computations = Cacher(__psi2computations, limit=1) diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py index 0ded9e14..6302a590 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py @@ -6,200 +6,194 @@ The package for the psi statistics computation """ import numpy as np -from . import PSICOMP -from GPy.util.caching import Cache_this -class PSICOMP_SSRBF(PSICOMP): +def psicomputations(variance, lengthscale, Z, variational_posterior): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi0, psi1 and psi2 + # Produced intermediate results: + # _psi1 NxM + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob - @Cache_this(limit=1, ignore_args=(0,)) - def psicomputations(self, variance, lengthscale, Z, variational_posterior): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi0, psi1 and psi2 - # Produced intermediate results: - # _psi1 NxM - mu = variational_posterior.mean - S = variational_posterior.variance - gamma = variational_posterior.binary_prob - - psi0 = np.empty(mu.shape[0]) - psi0[:] = variance - psi1 = self._psi1computations(variance, lengthscale, Z, mu, S, gamma) - psi2 = self._psi2computations(variance, lengthscale, Z, mu, S, gamma) - return psi0, psi1, psi2 + psi0 = np.empty(mu.shape[0]) + psi0[:] = variance + psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) + psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) + return psi0, psi1, psi2 + +def _psi1computations(variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: + # _psi1 NxM + + lengthscale2 = np.square(lengthscale) + + # psi1 + _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ + _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ + _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ + _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ + _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ + _psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ + _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) + _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ + _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM + _psi1 = variance * np.exp(_psi1_exp_sum) # NxM + + return _psi1 + +def _psi2computations(variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi2 + # Produced intermediate results: + # _psi2 MxM - def _psi1computations(self, variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 - # Produced intermediate results: - # _psi1 NxM + lengthscale2 = np.square(lengthscale) - lengthscale2 = np.square(lengthscale) + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q + _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q + _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ + + # psi2 + _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ + _psi2_denom_sqrt = np.sqrt(_psi2_denom) + _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q + _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom) + _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ + _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q + _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ + _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) + _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) + _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM + _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM + + return _psi2 + +def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + ARD = (len(lengthscale)!=1) - # psi1 - _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ - _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ - _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ - _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ - _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ - _psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ - _psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ - _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) - _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ - _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM - _psi1 = variance * np.exp(_psi1_exp_sum) # NxM + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 - return _psi1 + dL_dlengscale = dl_psi1 + dl_psi2 + if not ARD: + dL_dlengscale = dL_dlengscale.sum() + + dL_dgamma = dgamma_psi1 + dgamma_psi2 + dL_dmu = dmu_psi1 + dmu_psi2 + dL_dS = dS_psi1 + dS_psi2 + dL_dZ = dZ_psi1 + dZ_psi2 - def _psi2computations(self, variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi2 - # Produced intermediate results: - # _psi2 MxM - - lengthscale2 = np.square(lengthscale) - - _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q - _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ + return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma + +def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): + """ + dL_dpsi1 - NxM + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: dL_dparams w.r.t. psi1 + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ - # psi2 - _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ - _psi2_denom_sqrt = np.sqrt(_psi2_denom) - _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q - _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom) - _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ - _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q - _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ - _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) - _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) - _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM + lengthscale2 = np.square(lengthscale) + + # psi1 + _psi1_denom = S / lengthscale2 + 1. # NxQ + _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ + _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ + _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ + _psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ + _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ + _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) + _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ + _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM + _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ + _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ + _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ + _psi1 = variance * np.exp(_psi1_exp_sum) # NxM + _dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1 + _dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ + _dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ + _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ + _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) + _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) + + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma + +def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + dL_dpsi2 - MxM + """ + # here are the "statistics" for psi2 + # Produced the derivatives w.r.t. psi2: + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ - return _psi2 + lengthscale2 = np.square(lengthscale) - @Cache_this(limit=1, ignore_args=(0,1,2,3)) - def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): - ARD = (len(lengthscale)!=1) - - dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = self._psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = self._psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 - - dL_dlengscale = dl_psi1 + dl_psi2 - if not ARD: - dL_dlengscale = dL_dlengscale.sum() - - dL_dgamma = dgamma_psi1 + dgamma_psi2 - dL_dmu = dmu_psi1 + dmu_psi2 - dL_dS = dS_psi1 + dS_psi2 - dL_dZ = dZ_psi1 + dZ_psi2 - - return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma - - def _psi1compDer(self, dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): - """ - dL_dpsi1 - NxM - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 - # Produced intermediate results: dL_dparams w.r.t. psi1 - # _dL_dvariance 1 - # _dL_dlengthscale Q - # _dL_dZ MxQ - # _dL_dgamma NxQ - # _dL_dmu NxQ - # _dL_dS NxQ - - lengthscale2 = np.square(lengthscale) - - # psi1 - _psi1_denom = S / lengthscale2 + 1. # NxQ - _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ - _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ - _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ - _psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ - _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ - _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ - _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) - _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ - _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM - _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ - _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ - _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ - _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - _dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1 - _dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ - _dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ - _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ - _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) - _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) - - return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma - - def _psi2compDer(self, dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - dL_dpsi2 - MxM - """ - # here are the "statistics" for psi2 - # Produced the derivatives w.r.t. psi2: - # _dL_dvariance 1 - # _dL_dlengthscale Q - # _dL_dZ MxQ - # _dL_dgamma NxQ - # _dL_dmu NxQ - # _dL_dS NxQ - - lengthscale2 = np.square(lengthscale) - - _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q - _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - - # psi2 - _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ - _psi2_denom_sqrt = np.sqrt(_psi2_denom) - _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q - _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:]) - _psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ - _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q - _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ - _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) - _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) - _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ - _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ - _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ - _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM - _dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance - _dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z)) - _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq) - _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) - _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) - _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) - - return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q + _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q + _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ + + # psi2 + _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ + _psi2_denom_sqrt = np.sqrt(_psi2_denom) + _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q + _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:]) + _psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ + _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q + _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ + _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) + _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) + _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM + _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ + _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ + _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ + _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM + _dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance + _dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z)) + _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq) + _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) + _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) + _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) + + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 99692b0a..221ca593 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -8,7 +8,7 @@ from ...util.misc import param_to_array from stationary import Stationary from GPy.util.caching import Cache_this from ...core.parameterization import variational -from psi_comp import ssrbf_psi_comp,ssrbf_psi_gpucomp,rbf_psi_comp +from psi_comp import PSICOMP_RBF from ...util.config import * class RBF(Stationary): @@ -25,13 +25,7 @@ class RBF(Stationary): super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU) self.weave_options = {} self.group_spike_prob = False - self.psicomp = rbf_psi_comp.PSICOMP_RBF() - - def set_for_SpikeAndSlab(self): - if self.useGPU: - self.psicomp = ssrbf_psi_gpucomp.PSICOMP_SSRBF() - else: - self.psicomp = ssrbf_psi_comp.PSICOMP_SSRBF() + self.psicomp = PSICOMP_RBF() def K_of_r(self, r): return self.variance * np.exp(-0.5 * r**2) @@ -44,289 +38,39 @@ class RBF(Stationary): #---------------------------------------# def psi0(self, Z, variational_posterior): - if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - if self.useGPU: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] - else: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0] + if self.useGPU: + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] else: -# return self.Kdiag(variational_posterior.mean) return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0] def psi1(self, Z, variational_posterior): - if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - if self.useGPU: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] - else: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] + if self.useGPU: + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] else: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] def psi2(self, Z, variational_posterior): - if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - if self.useGPU: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] - else: - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2] + if self.useGPU: + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] else: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2] def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - # Spike-and-Slab GPLVM - if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - if self.useGPU: - self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - else: - dL_dvar, dL_dlengscale, _, _, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - self.variance.gradient = dL_dvar - self.lengthscale.gradient = dL_dlengscale - - elif isinstance(variational_posterior, variational.NormalPosterior): - dL_dvar, dL_dlengscale, _, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + if self.useGPU: + self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + else: + dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2] self.variance.gradient = dL_dvar self.lengthscale.gradient = dL_dlengscale -# l2 = self.lengthscale**2 -# if l2.size != self.input_dim: -# l2 = l2*np.ones(self.input_dim) -# #contributions from psi0: -# self.variance.gradient = np.sum(dL_dpsi0) -# self.lengthscale.gradient = 0. -# -# # #from psi1 -# denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior) -# d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale) -# dpsi1_dlength = d_length * dL_dpsi1[:, :, None] -# print dpsi1_dlength.sum(0).sum(0) -# if self.ARD: -# self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0) -# else: -# self.lengthscale.gradient += dpsi1_dlength.sum() -# self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance -# #from psi2 -# S = variational_posterior.variance -# _, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior) -# if not self.ARD: -# self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum() -# else: -# self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2) -# # print self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2) -# -# self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance - - else: - raise ValueError, "unknown distriubtion received for psi-statistics" - def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - # Spike-and-Slab GPLVM - if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - if self.useGPU: - return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - else: - _, _, dL_dZ, _, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - return dL_dZ - - elif isinstance(variational_posterior, variational.NormalPosterior): - _, _, dL_dZ, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - return dL_dZ -# -# l2 = self.lengthscale **2 -# -# #psi1 -# denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior) -# grad = np.einsum('ij,ij,ijk,ijk->jk', dL_dpsi1, psi1, dist, -1./(denom*l2)) -# -# #psi2 -# Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior) -# term1 = Zdist / l2 # M, M, Q -# S = variational_posterior.variance -# term2 = mudist / (2.*S[:,None,None,:] + l2) # N, M, M, Q -# -# grad += 2.*np.einsum('ijk,ijk,ijkl->kl', dL_dpsi2, psi2, term1[None,:,:,:] + term2) -# -# return grad + if self.useGPU: + return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: - raise ValueError, "unknown distriubtion received for psi-statistics" + return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2] def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - # Spike-and-Slab GPLVM - if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - if self.useGPU: - return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - else: - _, _, _, dL_dmu, dL_dS, dL_dgamma = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - return dL_dmu, dL_dS, dL_dgamma - - elif isinstance(variational_posterior, variational.NormalPosterior): - _, _, _, dL_dmu, dL_dS = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - -# l2 = self.lengthscale **2 -# #psi1 -# denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior) -# tmp = psi1[:, :, None] / l2 / denom -# grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1) -# grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1) -# #psi2 -# _, _, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior) -# S = variational_posterior.variance -# tmp = psi2[:, :, :, None] / (2.*S[:,None,None,:] + l2) -# grad_mu += -2.*np.einsum('jk,ijkl,ijkl->il', dL_dpsi2, tmp , mudist) -# grad_S += np.einsum('jk,ijkl,ijkl->il', dL_dpsi2 , tmp , (2.*mudist_sq - 1)) - - return dL_dmu, dL_dS - + if self.useGPU: + return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: - raise ValueError, "unknown distriubtion received for psi-statistics" - - #return grad_mu, grad_S - - #---------------------------------------# - # Precomputations # - #---------------------------------------# - - @Cache_this(limit=1) - def _psi1computations(self, Z, vp): - mu, S = vp.mean, vp.variance - l2 = self.lengthscale **2 - denom = S[:, None, :] / l2 + 1. # N,1,Q - dist = Z[None, :, :] - mu[:, None, :] # N,M,Q - dist_sq = np.square(dist) / l2 / denom # N,M,Q - exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M - psi1 = self.variance * np.exp(exponent) # N,M - return denom, dist, dist_sq, psi1 - - - @Cache_this(limit=1, ignore_args=(0,)) - def _Z_distances(self, Z): - Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - return Zhat, Zdist - - @Cache_this(limit=1) - def _psi2computations(self, Z, vp): - - if config.getboolean('parallel', 'openmp'): - pragma_string = '#pragma omp parallel for private(tmp, exponent_tmp)' - header_string = '#include ' - libraries = ['gomp'] - else: - pragma_string = '' - header_string = '' - libraries = [] - - mu, S = vp.mean, vp.variance - - N, Q = mu.shape - M = Z.shape[0] - - #compute required distances - Zhat, Zdist = self._Z_distances(Z) - Zdist_sq = np.square(Zdist / self.lengthscale) # M,M,Q - - #allocate memory for the things we want to compute - mudist = np.empty((N, M, M, Q)) - mudist_sq = np.empty((N, M, M, Q)) - psi2 = np.empty((N, M, M)) - - l2 = self.lengthscale **2 - denom = (2.*S[:,None,None,:] / l2) + 1. # N,Q - half_log_denom = 0.5 * np.log(denom[:,0,0,:]) - denom_l2 = denom[:,0,0,:]*l2 - - variance_sq = float(np.square(self.variance)) - code = """ - double tmp, exponent_tmp; - %s - for (int n=0; n - """ % header_string - mu = param_to_array(mu) - weave.inline(code, support_code=support_code, libraries=libraries, - arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'], - type_converters=weave.converters.blitz, **self.weave_options) - - return Zdist, Zdist_sq, mudist, mudist_sq, psi2 - - def _weave_psi2_lengthscale_grads(self, dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2): - - #here's the einsum equivalent, it's ~3 times slower - #return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale - - result = np.zeros(self.input_dim) - if config.getboolean('parallel', 'openmp'): - pragma_string = '#pragma omp parallel for reduction(+:tmp)' - header_string = '#include ' - libraries = ['gomp'] - else: - pragma_string = '' - header_string = '' - libraries = [] - code = """ - double tmp; - for(int q=0; q - """ % header_string - N,Q = S.shape - M = psi2.shape[-1] - - S = param_to_array(S) - weave.inline(code, support_code=support_code, libraries=libraries, - arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'], - type_converters=weave.converters.blitz, **self.weave_options) - - return 2.*result*self.lengthscale + return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:] diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index b0fe48b1..b10991ab 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -64,7 +64,6 @@ class SSGPLVM(SparseGP): if kernel is None: kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim) - kernel.set_for_SpikeAndSlab() if inference_method is None: inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)