diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index cf5ea0c4..7bf0adeb 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -7,6 +7,8 @@ from scipy import weave from ...util.misc import param_to_array from stationary import Stationary from GPy.util.caching import Cache_this +from ...core.parameterization import variational +from rbf_psi_comp import ssrbf_psi_comp class RBF(Stationary): """ @@ -36,14 +38,38 @@ class RBF(Stationary): return self.Kdiag(variational_posterior.mean) def psi1(self, Z, variational_posterior): - _, _, _, psi1 = self._psi1computations(Z, variational_posterior) + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + else: + _, _, _, psi1 = self._psi1computations(Z, variational_posterior) return psi1 def psi2(self, Z, variational_posterior): - _, _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior) + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + else: + _, _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior) return psi2 def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + # Spike-and-Slab GPLVM + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + #contributions from psi0: + self.variance.gradient = np.sum(dL_dpsi0) + + #from psi1 + self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance) + self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) + + + #from psi2 + self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() + self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) + return + l2 = self.lengthscale **2 #contributions from psi0: @@ -77,6 +103,19 @@ class RBF(Stationary): self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + # Spike-and-Slab GPLVM + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + #psi1 + grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) + + #psi2 + grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) + + return grad + l2 = self.lengthscale **2 #psi1 @@ -95,6 +134,24 @@ class RBF(Stationary): return grad def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + # Spike-and-Slab GPLVM + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + ndata = variational_posterior.mean.shape[0] + + _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + #psi1 + grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) + grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) + grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) + #psi2 + grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) + grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) + grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) + + return grad_mu, grad_S, grad_gamma + l2 = self.lengthscale **2 #psi1 denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior) diff --git a/GPy/kern/_src/rbf_psi_comp/__init__.py b/GPy/kern/_src/rbf_psi_comp/__init__.py new file mode 100644 index 00000000..4c0d373d --- /dev/null +++ b/GPy/kern/_src/rbf_psi_comp/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) diff --git a/GPy/kern/_src/rbf_psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/rbf_psi_comp/ssrbf_psi_comp.py new file mode 100644 index 00000000..f3d5ee6b --- /dev/null +++ b/GPy/kern/_src/rbf_psi_comp/ssrbf_psi_comp.py @@ -0,0 +1,111 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +""" +The package for the psi statistics computation +""" + +import numpy as np + +def _Z_distances(Z): + Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q + Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q + return Zhat, Zdist + +# def _psi1computations(self, Z, vp): +# mu, S = vp.mean, vp.variance +# l2 = lengthscale **2 +# denom = S[:, None, :] / l2 + 1. # N,1,Q +# dist = Z[None, :, :] - mu[:, None, :] # N,M,Q +# dist_sq = np.square(dist) / l2 / denom # N,M,Q +# exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M +# psi1 = self.variance * np.exp(exponent) # N,M +# return denom, dist, dist_sq, psi1 + +def _psi1computations(variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 and psi2 + # Produced intermediate results: + # _psi1 NxM + # _dpsi1_dvariance NxM + # _dpsi1_dlengthscale NxMxQ + # _dpsi1_dZ NxMxQ + # _dpsi1_dgamma NxMxQ + # _dpsi1_dmu NxMxQ + # _dpsi1_dS NxMxQ + + lengthscale2 = np.square(lengthscale) + + # psi1 + _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ + _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ + _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ + _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ + _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ + _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ + _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ + _psi1_exponent = np.log(np.exp(_psi1_exponent1) + np.exp(_psi1_exponent2)) #NxMxQ + _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM + _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ + _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ + _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ + _psi1 = variance * np.exp(_psi1_exp_sum) # NxM + _dpsi1_dvariance = _psi1 / variance # NxM + _dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ + _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ + _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ + _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ + _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ + + return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale + +def _psi2computations(variance, lengthscale, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 and psi2 + # Produced intermediate results: + # _psi2 NxMxM + # _psi2_dvariance NxMxM + # _psi2_dlengthscale NxMxMxQ + # _psi2_dZ NxMxMxQ + # _psi2_dgamma NxMxMxQ + # _psi2_dmu NxMxMxQ + # _psi2_dS NxMxMxQ + + lengthscale2 = np.square(lengthscale) + + _psi2_Zhat, _psi2_Zdist = _Z_distances(Z) + _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q + _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ + + # psi2 + _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ + _psi2_denom_sqrt = np.sqrt(_psi2_denom) + _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q + _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom) + _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ + _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q + _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ + _psi2_exponent = np.log(np.exp(_psi2_exponent1) + np.exp(_psi2_exponent2)) + _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM + _psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ + _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ + _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ + _psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M + _dpsi2_dvariance = 2. * _psi2/variance # NxMxM + _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ + _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ + _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ + _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ + _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ + + return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale diff --git a/GPy/kern/_src/ssrbf.py b/GPy/kern/_src/ssrbf.py index cd921acb..391ef1c7 100644 --- a/GPy/kern/_src/ssrbf.py +++ b/GPy/kern/_src/ssrbf.py @@ -7,6 +7,7 @@ import numpy as np from ...util.linalg import tdot from ...util.config import * from stationary import Stationary +from rbf_psi_comp import ssrbf_psi_comp class SSRBF(Stationary): """ @@ -54,101 +55,63 @@ class SSRBF(Stationary): # PSI statistics # #---------------------------------------# - def psi0(self, Z, posterior_variational): - ret = np.empty(posterior_variational.mean.shape[0]) + def psi0(self, Z, variational_posterior): + ret = np.empty(variational_posterior.mean.shape[0]) ret[:] = self.variance return ret - def psi1(self, Z, posterior_variational): - self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) - return self._psi1 + def psi1(self, Z, variational_posterior): + _psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + return _psi1 - def psi2(self, Z, posterior_variational): - self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) - return self._psi2 + def psi2(self, Z, variational_posterior): + _psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + return _psi2 - def dL_dpsi0_dmuSgamma(self, dL_dpsi0, Z, mu, S, gamma, target_mu, target_S, target_gamma): - pass - - - def dL_dpsi1_dmuSgamma(self, dL_dpsi1, Z, mu, S, gamma, target_mu, target_S, target_gamma): - self._psi_computations(Z, mu, S, gamma) - target_mu += (dL_dpsi1[:, :, None] * self._dpsi1_dmu).sum(axis=1) - target_S += (dL_dpsi1[:, :, None] * self._dpsi1_dS).sum(axis=1) - target_gamma += (dL_dpsi1[:,:,None] * self._dpsi1_dgamma).sum(axis=1) - - - def dL_dpsi2_dmuSgamma(self, dL_dpsi2, Z, mu, S, gamma, target_mu, target_S, target_gamma): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S, gamma) - target_mu += (dL_dpsi2[:, :, :, None] * self._dpsi2_dmu).reshape(mu.shape[0],-1,mu.shape[1]).sum(axis=1) - target_S += (dL_dpsi2[:, :, :, None] * self._dpsi2_dS).reshape(S.shape[0],-1,S.shape[1]).sum(axis=1) - target_gamma += (dL_dpsi2[:,:,:, None] *self._dpsi2_dgamma).reshape(gamma.shape[0],-1,gamma.shape[1]).sum(axis=1) - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): - self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) #contributions from psi0: self.variance.gradient = np.sum(dL_dpsi0) #from psi1 - self.variance.gradient += np.sum(dL_dpsi1 * self._dpsi1_dvariance) - self.lengthscale.gradient = (dL_dpsi1[:,:,None]*self._dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) + self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance) + self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) #from psi2 - self.variance.gradient += (dL_dpsi2 * self._dpsi2_dvariance).sum() - self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * self._dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) - - #from Kmm - self._K_computations(Z, None) - dvardLdK = self._K_dvar * dL_dKmm - var_len3 = self.variance / (np.square(self.lengthscale)*self.lengthscale) - - self.variance.gradient += np.sum(dvardLdK) - self.lengthscale.gradient += (np.square(Z[:,None,:]-Z[None,:,:])*dvardLdK[:,:,None]).reshape(-1,self.input_dim).sum(axis=0)*var_len3 + self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() + self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) - - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): - self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) #psi1 - grad = (dL_dpsi1[:, :, None] * self._dpsi1_dZ).sum(axis=0) + grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) #psi2 - grad += (dL_dpsi2[:, :, :, None] * self._dpsi2_dZ).sum(axis=0).sum(axis=1) - - grad += self.gradients_X(dL_dKmm, Z, None) + grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) return grad - def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): - ndata = posterior_variational.mean.shape[0] - self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ndata = variational_posterior.mean.shape[0] + + _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + #psi1 - grad_mu = (dL_dpsi1[:, :, None] * self._dpsi1_dmu).sum(axis=1) - grad_S = (dL_dpsi1[:, :, None] * self._dpsi1_dS).sum(axis=1) - grad_gamma = (dL_dpsi1[:,:,None] * self._dpsi1_dgamma).sum(axis=1) + grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) + grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) + grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) #psi2 - grad_mu += (dL_dpsi2[:, :, :, None] * self._dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) - grad_S += (dL_dpsi2[:, :, :, None] * self._dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) - grad_gamma += (dL_dpsi2[:,:,:, None] *self._dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) + grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) + grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) + grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) return grad_mu, grad_S, grad_gamma - - def gradients_X(self, dL_dK, X, X2=None): - #if self._X is None or X.base is not self._X.base or X2 is not None: - if X2==None: - _K_dist = X[:,None,:] - X[None,:,:] - _K_dist2 = np.square(_K_dist/self.lengthscale).sum(axis=-1) - dK_dX = self.variance*np.exp(-0.5 * self._K_dist2[:,:,None]) * (-2.*_K_dist/np.square(self.lengthscale)) - dL_dX = (dL_dK[:,:,None] * dK_dX).sum(axis=1) - else: - _K_dist = X[:,None,:] - X2[None,:,:] - _K_dist2 = np.square(_K_dist/self.lengthscale).sum(axis=-1) - dK_dX = self.variance*np.exp(-0.5 * self._K_dist2[:,:,None]) * (-_K_dist/np.square(self.lengthscale)) - dL_dX = (dL_dK[:,:,None] * dK_dX).sum(axis=1) - return dL_dX #---------------------------------------# # Precomputations # @@ -174,78 +137,3 @@ class SSRBF(Stationary): self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), axis=1)[:, None] + np.sum(np.square(X2), axis=1)[None, :]) self._K_dvar = np.exp(-0.5 * self._K_dist2) - #@cache_this(1) - def _psi_computations(self, Z, mu, S, gamma): - """ - Z - MxQ - mu - NxQ - S - NxQ - gamma - NxQ - """ - # here are the "statistics" for psi1 and psi2 - # Produced intermediate results: - # _psi1 NxM - # _dpsi1_dvariance NxM - # _dpsi1_dlengthscale NxMxQ - # _dpsi1_dZ NxMxQ - # _dpsi1_dgamma NxMxQ - # _dpsi1_dmu NxMxQ - # _dpsi1_dS NxMxQ - # _psi2 NxMxM - # _psi2_dvariance NxMxM - # _psi2_dlengthscale NxMxMxQ - # _psi2_dZ NxMxMxQ - # _psi2_dgamma NxMxMxQ - # _psi2_dmu NxMxMxQ - # _psi2_dS NxMxMxQ - - lengthscale2 = np.square(self.lengthscale) - - _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q - _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q - _psi2_Zdist_sq = np.square(_psi2_Zdist / self.lengthscale) # M,M,Q - _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - - # psi1 - _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ - _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ - _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ - _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ - _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ - _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ - _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ - _psi1_exponent = np.log(np.exp(_psi1_exponent1) + np.exp(_psi1_exponent2)) #NxMxQ - _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM - _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ - _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ - _psi1_q = self.variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ - self._psi1 = self.variance * np.exp(_psi1_exp_sum) # NxM - self._dpsi1_dvariance = self._psi1 / self.variance # NxM - self._dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ - self._dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ - self._dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ - self._dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ - self._dpsi1_dlengthscale = 2.*self.lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ - - - # psi2 - _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ - _psi2_denom_sqrt = np.sqrt(_psi2_denom) - _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q - _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom) - _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ - _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q - _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ - _psi2_exponent = np.log(np.exp(_psi2_exponent1) + np.exp(_psi2_exponent2)) - _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2_q = np.square(self.variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ - _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ - _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ - self._psi2 = np.square(self.variance) * np.exp(_psi2_exp_sum) # N,M,M - self._dpsi2_dvariance = 2. * self._psi2/self.variance # NxMxM - self._dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ - self._dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ - self._dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ - self._dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ - self._dpsi2_dlengthscale = 2.*self.lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ - \ No newline at end of file diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 18a08e5d..8763426a 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -150,37 +150,6 @@ class BayesianGPLVM(SparseGP): return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) -class BayesianGPLVMWithMissingData(BayesianGPLVM): - def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, - Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): - from ..util.subarray_and_sorting import common_subarrays - self.subarrays = common_subarrays(Y) - import ipdb;ipdb.set_trace() - BayesianGPLVM.__init__(self, Y, input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, name=name, **kwargs) - - - def parameters_changed(self): - super(BayesianGPLVM, self).parameters_changed() - self._log_marginal_likelihood -= self.KL_divergence() - - dL_dmu, dL_dS = self.dL_dmuS() - - # dL: - self.X.mean.gradient = dL_dmu - self.X.variance.gradient = dL_dS - - # dKL: - self.X.mean.gradient -= self.X.mean - self.X.variance.gradient -= (1. - (1. / (self.X.variance))) * 0.5 - -if __name__ == '__main__': - import numpy as np - X = np.random.randn(20,2) - W = np.linspace(0,1,10)[None,:] - Y = (X*W).sum(1) - missing = np.random.binomial(1,.1,size=Y.shape) - - pass def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): """ diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index f21da605..94682c74 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -58,7 +58,7 @@ class SSGPLVM(SparseGP): super(SSGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) - self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) + self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict) # update for the KL divergence self.variational_prior.update_gradients_KL(self.X)