From e6d07ad5acb4b047e5dc28014fbe7822b8d9e6ee Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Fri, 16 May 2014 11:23:44 +0100 Subject: [PATCH] fix pickle --- .../var_dtc_parallel.py | 7 +- GPy/kern/_src/linear.py | 6 +- GPy/kern/_src/psi_comp/__init__.py | 17 + GPy/kern/_src/psi_comp/linear_psi_comp.py | 12 +- GPy/kern/_src/psi_comp/ssrbf_psi_comp.py | 374 +++++++++--------- GPy/kern/_src/rbf.py | 66 +--- GPy/models/ss_gplvm.py | 23 +- 7 files changed, 230 insertions(+), 275 deletions(-) diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py index 44babf25..7b11085e 100644 --- a/GPy/inference/latent_function_inference/var_dtc_parallel.py +++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py @@ -26,7 +26,7 @@ class VarDTC_minibatch(LatentFunctionInference): """ const_jitter = 1e-6 - def __init__(self, batchsize, limit=1, mpi_comm=None): + def __init__(self, batchsize=None, limit=1, mpi_comm=None): self.batchsize = batchsize self.mpi_comm = mpi_comm @@ -74,9 +74,12 @@ class VarDTC_minibatch(LatentFunctionInference): return jitchol(tdot(Y)) def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs, het_noise): - + num_inducing = Z.shape[0] num_data, output_dim = Y.shape + + if self.batchsize == None or self.batchsize>num_data: + self.batchsize = num_data trYYT = self.get_trYYT(Y) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 7a2f899f..649c1222 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -111,20 +111,20 @@ class Linear(Kern): def psi0(self, Z, variational_posterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - return self.psicomp.psicomputations(self.variances, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] + return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[0] else: return np.sum(self.variances * self._mu2S(variational_posterior), 1) def psi1(self, Z, variational_posterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - return self.psicomp.psicomputations(self.variances, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] + return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[1] else: return self.K(variational_posterior.mean, Z) #the variance, it does nothing @Cache_this(limit=1) def psi2(self, Z, variational_posterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - return self.psicomp.psicomputations(self.variances, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] + return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[2] else: ZA = Z * self.variances ZAinner = self._ZAinner(variational_posterior, Z) diff --git a/GPy/kern/_src/psi_comp/__init__.py b/GPy/kern/_src/psi_comp/__init__.py index 4c0d373d..17ea2e83 100644 --- a/GPy/kern/_src/psi_comp/__init__.py +++ b/GPy/kern/_src/psi_comp/__init__.py @@ -1,2 +1,19 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) + +from ....core.parameterization.parameter_core import Pickleable + +class PSICOMP(Pickleable): + + def psicomputations(self, variance, Z, variational_posterior): + """ + Compute psi-statistics + """ + pass + + def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior): + """ + Compute the derivatives of parameters by combing dL_dpsi and dpsi_dparam + """ + pass + diff --git a/GPy/kern/_src/psi_comp/linear_psi_comp.py b/GPy/kern/_src/psi_comp/linear_psi_comp.py index 11c49900..3404c987 100644 --- a/GPy/kern/_src/psi_comp/linear_psi_comp.py +++ b/GPy/kern/_src/psi_comp/linear_psi_comp.py @@ -6,11 +6,12 @@ The package for the Psi statistics computation of the linear kernel for SSGPLVM """ import numpy as np +from . import PSICOMP from GPy.util.caching import Cache_this -class PSICOMP_SSLinear(object): - #@Cache_this(limit=1, ignore_args=(0,)) - def psicomputations(self, variance, Z, mu, S, gamma): +class PSICOMP_SSLinear(PSICOMP): + @Cache_this(limit=1, ignore_args=(0,)) + def psicomputations(self, variance, Z, variational_posterior): """ Compute psi-statistics for ss-linear kernel """ @@ -19,6 +20,9 @@ class PSICOMP_SSLinear(object): # psi0 N # psi1 NxM # psi2 MxM + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob psi0 = np.einsum('q,nq,nq->n',variance,gamma,np.square(mu)+S) psi1 = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) @@ -30,7 +34,7 @@ class PSICOMP_SSLinear(object): return psi0, psi1, psi2 - #@Cache_this(limit=1, ignore_args=(0,1,2,3)) + @Cache_this(limit=1, ignore_args=(0,1,2,3)) def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior): mu = variational_posterior.mean S = variational_posterior.variance diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py index 24e04ae4..8a24c60a 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py @@ -6,210 +6,200 @@ The package for the psi statistics computation """ import numpy as np +from . import PSICOMP from GPy.util.caching import Cache_this,Cacher -@Cache_this(limit=1) -def psicomputations(variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi0, psi1 and psi2 - # Produced intermediate results: - # _psi1 NxM +class PSICOMP_SSRBF(PSICOMP): - psi0 = np.empty(mu.shape[0]) - psi0[:] = variance - psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) - psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) - return psi0, psi1, psi2 - -def _psi1computations(variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 - # Produced intermediate results: - # _psi1 NxM - + @Cache_this(limit=1, ignore_args=(0,)) + def psicomputations(self, variance, lengthscale, Z, variational_posterior): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi0, psi1 and psi2 + # Produced intermediate results: + # _psi1 NxM + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob + + psi0 = np.empty(mu.shape[0]) + psi0[:] = variance + psi1 = self._psi1computations(variance, lengthscale, Z, mu, S, gamma) + psi2 = self._psi2computations(variance, lengthscale, Z, mu, S, gamma) + return psi0, psi1, psi2 - lengthscale2 = np.square(lengthscale) - - # psi1 - _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ - _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ - _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ - _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ - _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ - _psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ - _psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ - _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) - _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ - _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM - _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - - return _psi1 - -def _psi2computations(variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi2 - # Produced intermediate results: - # _psi2 MxM + def _psi1computations(self, variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: + # _psi1 NxM - lengthscale2 = np.square(lengthscale) + lengthscale2 = np.square(lengthscale) - _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q - _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - - # psi2 - _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ - _psi2_denom_sqrt = np.sqrt(_psi2_denom) - _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q - _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom) - _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ - _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q - _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ - _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) - _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) - _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM - - return _psi2 - -def _psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): - ARD = (len(lengthscale)!=1) + # psi1 + _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ + _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ + _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ + _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ + _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ + _psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ + _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) + _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ + _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM + _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 + return _psi1 - dL_dlengscale = dl_psi1 + dl_psi2 - if not ARD: - dL_dlengscale = dL_dlengscale.sum() - - dL_dgamma = dgamma_psi1 + dgamma_psi2 - dL_dmu = dmu_psi1 + dmu_psi2 - dL_dS = dS_psi1 + dS_psi2 - dL_dZ = dZ_psi1 + dZ_psi2 + def _psi2computations(self, variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi2 + # Produced intermediate results: + # _psi2 MxM + + lengthscale2 = np.square(lengthscale) + + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q + _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q + _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma - -def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): - """ - dL_dpsi1 - NxM - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 - # Produced intermediate results: dL_dparams w.r.t. psi1 - # _dL_dvariance 1 - # _dL_dlengthscale Q - # _dL_dZ MxQ - # _dL_dgamma NxQ - # _dL_dmu NxQ - # _dL_dS NxQ + # psi2 + _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ + _psi2_denom_sqrt = np.sqrt(_psi2_denom) + _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q + _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom) + _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ + _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q + _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ + _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) + _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) + _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM + _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM - lengthscale2 = np.square(lengthscale) - - # psi1 - _psi1_denom = S / lengthscale2 + 1. # NxQ - _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ - _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ - _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ - _psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ - _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ - _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ - _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) - _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ - _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM - _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ - _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ - _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ - _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - _dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1 - _dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ - _dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ - _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ - _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) - _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) + return _psi2 -# _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ -# _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ -# _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ -# _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ - - return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma - -def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - dL_dpsi2 - MxM - """ - # here are the "statistics" for psi2 - # Produced the derivatives w.r.t. psi2: - # _dL_dvariance 1 - # _dL_dlengthscale Q - # _dL_dZ MxQ - # _dL_dgamma NxQ - # _dL_dmu NxQ - # _dL_dS NxQ + @Cache_this(limit=1, ignore_args=(0,1,2,3)) + def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + ARD = (len(lengthscale)!=1) + + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = self._psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = self._psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - lengthscale2 = np.square(lengthscale) + dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 + + dL_dlengscale = dl_psi1 + dl_psi2 + if not ARD: + dL_dlengscale = dL_dlengscale.sum() - _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q - _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - - # psi2 - _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ - _psi2_denom_sqrt = np.sqrt(_psi2_denom) - _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q - _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:]) - _psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ - _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q - _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ - _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) - _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) - _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ - _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ - _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ - _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM - _dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance - _dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z)) - _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq) - _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) - _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) -# print _psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq #+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) - _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) + dL_dgamma = dgamma_psi1 + dgamma_psi2 + dL_dmu = dmu_psi1 + dmu_psi2 + dL_dS = dS_psi1 + dS_psi2 + dL_dZ = dZ_psi1 + dZ_psi2 + + return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma + def _psi1compDer(self, dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): + """ + dL_dpsi1 - NxM + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: dL_dparams w.r.t. psi1 + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) -# _dpsi2_dvariance = 2. * _psi2/variance # NxMxM -# _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ -# _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ -# _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ -# _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ -# _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ - - return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma - -psiDerivativecomputations = Cacher(_psiDerivativecomputations, limit=1, ignore_args=(0,1,2,)) + # psi1 + _psi1_denom = S / lengthscale2 + 1. # NxQ + _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ + _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ + _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ + _psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ + _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ + _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) + _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ + _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM + _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ + _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ + _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ + _psi1 = variance * np.exp(_psi1_exp_sum) # NxM + _dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1 + _dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ + _dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ + _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ + _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) + _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) + + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma + + def _psi2compDer(self, dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + dL_dpsi2 - MxM + """ + # here are the "statistics" for psi2 + # Produced the derivatives w.r.t. psi2: + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q + _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q + _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ + + # psi2 + _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ + _psi2_denom_sqrt = np.sqrt(_psi2_denom) + _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q + _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:]) + _psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ + _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q + _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ + _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) + _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) + _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM + _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ + _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ + _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ + _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM + _dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance + _dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z)) + _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq) + _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) + _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) + _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) + + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 51db1879..c75e4341 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -31,7 +31,7 @@ class RBF(Stationary): if self.useGPU: self.psicomp = ssrbf_psi_gpucomp.PSICOMP_SSRBF() else: - self.psicomp = ssrbf_psi_comp + self.psicomp = ssrbf_psi_comp.PSICOMP_SSRBF() def K_of_r(self, r): return self.variance * np.exp(-0.5 * r**2) @@ -48,7 +48,7 @@ class RBF(Stationary): if self.useGPU: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] else: - return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0] else: return self.Kdiag(variational_posterior.mean) @@ -57,7 +57,7 @@ class RBF(Stationary): if self.useGPU: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] else: - return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] else: _, _, _, psi1 = self._psi1computations(Z, variational_posterior) return psi1 @@ -67,7 +67,7 @@ class RBF(Stationary): if self.useGPU: return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] else: - return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2] else: _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior) return psi2 @@ -78,30 +78,9 @@ class RBF(Stationary): if self.useGPU: self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: -# dL_dvar, dL_dlengscale, dL_dZ, dL_dgamma, dL_dmu, dL_dS = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) - dL_dvar, dL_dlengscale, _, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + dL_dvar, dL_dlengscale, _, _, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) self.variance.gradient = dL_dvar self.lengthscale.gradient = dL_dlengscale - -# _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) -# _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) -# -# #contributions from psi0: -# self.variance.gradient = np.sum(dL_dpsi0) -# -# #from psi1 -# self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance) -# if self.ARD: -# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) -# else: -# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() -# -# #from psi2 -# self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() -# if self.ARD: -# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) -# else: -# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum() elif isinstance(variational_posterior, variational.NormalPosterior): l2 = self.lengthscale**2 @@ -140,20 +119,9 @@ class RBF(Stationary): if self.useGPU: return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: - _, _, dL_dZ, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + _, _, dL_dZ, _, _, _ = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) return dL_dZ -# _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) -# _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) -# -# #psi1 -# grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) -# -# #psi2 -# grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) -# -# return grad - elif isinstance(variational_posterior, variational.NormalPosterior): l2 = self.lengthscale **2 @@ -179,29 +147,9 @@ class RBF(Stationary): if self.useGPU: return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: - _, _, _, dL_dmu, dL_dS, dL_dgamma = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + _, _, _, dL_dmu, dL_dS, dL_dgamma = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) return dL_dmu, dL_dS, dL_dgamma -# ndata = variational_posterior.mean.shape[0] -# -# _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) -# _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) -# -# #psi1 -# grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) -# grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) -# grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) -# -# #psi2 -# grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) -# grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) -# grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) -# -# if self.group_spike_prob: -# grad_gamma[:] = grad_gamma.mean(axis=0) -# -# return grad_mu, grad_S, grad_gamma - elif isinstance(variational_posterior, variational.NormalPosterior): l2 = self.lengthscale **2 diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index 27d5158f..8581bf7c 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -59,20 +59,15 @@ class SSGPLVM(SparseGP): pi = np.empty((input_dim)) pi[:] = 0.5 -# if mpi_comm != None: -# mpi_comm.Bcast(X, root=0) -# mpi_comm.Bcast(fracs, root=0) -# mpi_comm.Bcast(X_variance, root=0) -# mpi_comm.Bcast(gamma, root=0) -# mpi_comm.Bcast(Z, root=0) -# mpi_comm.Bcast(pi, root=0) - if likelihood is None: likelihood = Gaussian() if kernel is None: kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim) kernel.set_for_SpikeAndSlab() + + if inference_method is None: + inference_method = VarDTC_minibatch(mpi_comm=mpi_comm) self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b @@ -131,16 +126,14 @@ class SSGPLVM(SparseGP): def __getstate__(self): dc = super(SSGPLVM, self).__getstate__() - del dc['mpi_comm'] - del dc['Y_local'] - del dc['X_local'] + dc['mpi_comm'] = None + if self.mpi_comm != None: + del dc['Y_local'] + del dc['X_local'] + del dc['Y_range'] return dc def __setstate__(self, state): - state['mpi_comm'] = None - Y_range = state['Y_range'] - state['Y_local'] = state['Y'][Y_range[0]:Y_range[1]] - state['X_local'] = state['X'][Y_range[0]:Y_range[1]] return super(SSGPLVM, self).__setstate__(state) def _grads(self, x):