[ssmrd] implement with IBP prior

This commit is contained in:
Zhenwen Dai 2015-05-22 14:29:53 +01:00
parent afa0621488
commit b5cac9af8c
3 changed files with 262 additions and 27 deletions

View file

@ -80,7 +80,7 @@ class SpikeAndSlabPrior(VariationalPrior):
pi = self.pi pi = self.pi
if self.group_spike: if self.group_spike:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))/mu.shape[0] dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))/variational_posterior.num_data
else: else:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma)) dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
@ -165,12 +165,16 @@ class SpikeAndSlabPosterior(VariationalPosterior):
''' '''
The SpikeAndSlab distribution for variational approximations. The SpikeAndSlab distribution for variational approximations.
''' '''
def __init__(self, means, variances, binary_prob, group_spike=False, name='latent space'): def __init__(self, means, variances, binary_prob, group_spike=False, sharedX=False, name='latent space'):
""" """
binary_prob : the probability of the distribution on the slab part. binary_prob : the probability of the distribution on the slab part.
""" """
super(SpikeAndSlabPosterior, self).__init__(means, variances, name) super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.group_spike = group_spike self.group_spike = group_spike
self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
if group_spike: if group_spike:
self.gamma_group = Param("binary_prob_group",binary_prob.mean(axis=0),Logistic(1e-6,1.-1e-6)) self.gamma_group = Param("binary_prob_group",binary_prob.mean(axis=0),Logistic(1e-6,1.-1e-6))
self.gamma = Param("binary_prob",binary_prob, __fixed__) self.gamma = Param("binary_prob",binary_prob, __fixed__)
@ -181,7 +185,7 @@ class SpikeAndSlabPosterior(VariationalPosterior):
def propogate_val(self): def propogate_val(self):
if self.group_spike: if self.group_spike:
self.gamma.param_array.values.reshape(self.gamma.shape)[:] = self.gamma_group.values self.gamma.values[:] = self.gamma_group.values
def collate_gradient(self): def collate_gradient(self):
if self.group_spike: if self.group_spike:

View file

@ -15,12 +15,16 @@ class IBPPosterior(SpikeAndSlabPosterior):
''' '''
The SpikeAndSlab distribution for variational approximations. The SpikeAndSlab distribution for variational approximations.
''' '''
def __init__(self, means, variances, binary_prob, tau=None, name='latent space'): def __init__(self, means, variances, binary_prob, tau=None, sharedX=False, name='latent space'):
""" """
binary_prob : the probability of the distribution on the slab part. binary_prob : the probability of the distribution on the slab part.
""" """
from ..core.parameterization.transformations import Logexp from ..core.parameterization.transformations import Logexp
super(IBPPosterior, self).__init__(means, variances, binary_prob, group_spike=True, name=name) super(IBPPosterior, self).__init__(means, variances, binary_prob, group_spike=True, name=name)
self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
self.tau = Param("tau_", np.ones((self.gamma_group.shape[0],2)), Logexp()) self.tau = Param("tau_", np.ones((self.gamma_group.shape[0],2)), Logexp())
self.link_parameter(self.tau) self.link_parameter(self.tau)
@ -83,7 +87,7 @@ class IBPPrior(VariationalPrior):
variational_posterior.mean.gradient -= gamma*mu/self.variance variational_posterior.mean.gradient -= gamma*mu/self.variance
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2. variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2.
from scipy.special import digamma,polygamma from scipy.special import digamma,polygamma
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/mu.shape[0] dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim ad = self.alpha/self.input_dim
common = (ad+2-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1)) common = (ad+2-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1))
@ -103,9 +107,11 @@ class SSGPLVM(SparseGP_MPI):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
self.group_spike = group_spike self.group_spike = group_spike
self.init = init
self.sharedX = sharedX
if X == None: if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
@ -113,8 +119,6 @@ class SSGPLVM(SparseGP_MPI):
else: else:
fracs = np.ones(input_dim) fracs = np.ones(input_dim)
self.init = init
if X_variance is None: # The variance of the variational approximation (S) if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
@ -146,11 +150,11 @@ class SSGPLVM(SparseGP_MPI):
pi[:] = 0.5 pi[:] = 0.5
if IBP: if IBP:
self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior
X = IBPPosterior(X, X_variance, gamma, tau=tau) X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX)
else: else:
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) # the prior probability of the latent binary variable b self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior
X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike) X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX)
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs) super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
self.link_parameter(self.X, index=0) self.link_parameter(self.X, index=0)
@ -163,8 +167,12 @@ class SSGPLVM(SparseGP_MPI):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
def _propogate_X_val(self):
pass
def parameters_changed(self): def parameters_changed(self):
self.X.propogate_val() self.X.propogate_val()
if self.sharedX: self._highest_parent_._propogate_X_val()
super(SSGPLVM,self).parameters_changed() super(SSGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch): if isinstance(self.inference_method, VarDTC_minibatch):
self.X.collate_gradient() self.X.collate_gradient()

View file

@ -2,33 +2,256 @@
The Maniforld Relevance Determination model with the spike-and-slab prior The Maniforld Relevance Determination model with the spike-and-slab prior
""" """
import numpy as np
from ..core import Model from ..core import Model
from .ss_gplvm import SSGPLVM from .ss_gplvm import SSGPLVM
from ..core.parameterization.variational import SpikeAndSlabPrior,NormalPosterior,VariationalPrior
from ..util.misc import param_to_array
from ..kern import RBF
from ..core import Param
from numpy.linalg.linalg import LinAlgError
class SSMRD(Model): class SSMRD(Model):
def __init__(self, Ylist, input_dim, X=None, X_variance=None, def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute',
initx = 'PCA', initz = 'permute', num_inducing=10, Zs=None, kernels=None, inference_methods=None, likelihoods=None, group_spike=True,
num_inducing=10, Z=None, kernel=None, pi=0.5, name='ss_mrd', Ynames=None, mpi_comm=None, IBP=False, alpha=2., taus=None, ):
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
super(SSMRD, self).__init__(name) super(SSMRD, self).__init__(name)
self.mpi_comm = mpi_comm
self._PROPAGATE_ = False
self.updates = False # initialize X for individual models
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx, X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods, self.X = NormalPosterior(means=X, variances=X_variance)
name='model_'+str(i)) for i,y in enumerate(Ylist)]
self.add_parameters(*(self.models))
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])] if kernels is None:
for i in range(self.models[0].X.mean.shape[0])] kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in xrange(len(Ylist))]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])] if Zs is None:
for i in range(self.models[0].X.variance.shape[0])] Zs = [None]* len(Ylist)
if likelihoods is None:
likelihoods = [None]* len(Ylist)
if inference_methods is None:
inference_methods = [None]* len(Ylist)
self.updates = True if IBP:
self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in xrange(len(Ylist))]
else:
self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in xrange(len(Ylist))]
self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike,
kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i],
name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)]
self.link_parameters(*(self.models+[self.X]))
def _propogate_X_val(self):
if self._PROPAGATE_: return
for m in self.models:
m.X.mean.values[:] = self.X.mean.values
m.X.variance.values[:] = self.X.variance.values
varp_list = [m.X for m in self.models]
[vp._update_inernal(varp_list) for vp in self.var_priors]
self._PROPAGATE_=True
def _collate_X_gradient(self):
self._PROPAGATE_ = False
self.X.mean.gradient[:] = 0
self.X.variance.gradient[:] = 0
for m in self.models:
self.X.mean.gradient += m.X.mean.gradient
self.X.variance.gradient += m.X.variance.gradient
def parameters_changed(self): def parameters_changed(self):
super(SSMRD, self).parameters_changed() super(SSMRD, self).parameters_changed()
[m.parameters_changed() for m in self.models]
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models]) self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
self._collate_X_gradient()
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
# Divide latent dimensions
idx = np.empty((input_dim,),dtype=np.int)
residue = (input_dim)%(len(Ylist))
for i in xrange(len(Ylist)):
if i < residue:
size = input_dim/len(Ylist)+1
idx[i*size:(i+1)*size] = i
else:
size = input_dim/len(Ylist)
idx[i*size+residue:(i+1)*size+residue] = i
if X is None:
if initx == 'PCA_concat':
X = np.empty((Ylist[0].shape[0],input_dim))
fracs = np.empty((input_dim,))
from ..util.initialization import initialize_latent
for i in xrange(len(Ylist)):
Y = Ylist[i]
dim = (idx==i).sum()
if dim>0:
x, fr = initialize_latent('PCA', dim, Y)
X[:,idx==i] = x
fracs[idx==i] = fr
elif initx=='PCA_joint':
y = np.hstack(Ylist)
from ..util.initialization import initialize_latent
X, fracs = initialize_latent('PCA', input_dim, y)
else:
X = np.random.randn(Ylist[0].shape[0], input_dim)
fracs = np.ones(input_dim)
else:
fracs = np.ones(input_dim)
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
if Gammas is None:
Gammas = []
for x in X:
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
Gammas.append(gamma)
return X, X_variance, Gammas, fracs
@Model.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
Model.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
elif self.mpi_comm.rank==0:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
x = self.optimizer_array.copy()
flag = np.empty(1,dtype=np.int32)
while True:
self.mpi_comm.Bcast(flag,root=0)
if flag==1:
try:
self.optimizer_array = x
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures:
raise
self._fail_count += 1
elif flag==-1:
break
else:
self._IN_OPTIMIZATION_ = False
raise Exception("Unrecognizable flag for synchronization!")
self._IN_OPTIMIZATION_ = False
class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
def __init__(self, nModels, pi=0.5, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
self.nModels = nModels
self._b_prob_all = 0.5
super(SpikeAndSlabPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
if self.group_spike:
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
else:
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
[np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
if self.group_spike:
gamma = variational_posterior.binary_prob[0]
else:
gamma = variational_posterior.binary_prob
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
N = variational_posterior.num_data
if self.group_spike:
gamma = variational_posterior.binary_prob.values[0]
else:
gamma = variational_posterior.binary_prob.values
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
if self.group_spike:
tmp = self._b_prob_all/(1.-gamma)
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
else:
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
if self.learnPi:
raise 'Not Supported!'
class IBPPrior_SSMRD(VariationalPrior):
def __init__(self, nModels, input_dim, alpha =2., tau=None, name='IBPPrior', **kw):
super(IBPPrior_SSMRD, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.nModels = nModels
self._b_prob_all = 0.5
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = ((1.-self._b_prob_all)* (np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + (betaln(ad,1.)*self.input_dim -betaln(tau[:,0], tau[:,1]).sum())/self.nModels \
+ (( (tau[:,0]-ad)/self.nModels -gamma)*digamma(tau[:,0])).sum() + \
(((tau[:,1]-1.)/self.nModels+gamma-1.)*digamma(tau[:,1])).sum() + (((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
from scipy.special import digamma,polygamma
tmp = self._b_prob_all/(1.-gamma)
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = ((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -(((tau[:,0]-ad)/self.nModels -gamma)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -(((tau[:,1]-1.)/self.nModels+gamma-1.)*polygamma(1,tau[:,1])+common)