bug fix for tie: merge tie

This commit is contained in:
Zhenwen Dai 2014-10-16 17:33:32 +01:00
parent 31b451494d
commit 84b096d20c
12 changed files with 230 additions and 89 deletions

View file

@ -112,6 +112,8 @@ class Tie(Parameterized):
def _sync_constraints_p(p, tieparam, read, cons):
if p.tie is not None:
p = p._original_
if p is tieparam:
return
labels = np.unique(p.tie)
labels = labels[labels>0]
for l in labels:
@ -146,7 +148,6 @@ class Tie(Parameterized):
if not p.has_parent():
p.ties = Tie()
p.link_parameter(p.ties, -1)
p.add_observer(p.ties, p.ties._parameters_changed_notification, priority=-500)
p.update_model(False)
labels = p.ties._get_labels([p])
@ -157,6 +158,7 @@ class Tie(Parameterized):
p.ties._sync_val([p],toTiedParam=True)
p.ties._sync_constraints([p], toTiedParam=True)
p.ties._update_label_buf()
p.add_observer(p.ties, p.ties._parameters_changed_notification, priority=-500)
p.update_model(True)
def mergeTies(self, p):
@ -168,6 +170,7 @@ class Tie(Parameterized):
self.tied_param[-p.ties.tied_param.size:] = p.ties.tied_param
pairs = zip(self.tied_param.tie,tie_labels)
self._replace_labels(p, pairs)
self._sync_constraints([self._parent_], toTiedParam=True)
p.remove_observer(p.ties)
p.unlink_parameter(p.ties)
del p.ties
@ -184,11 +187,13 @@ class Tie(Parameterized):
labels = self._get_labels([p])
labels = labels[labels>0]
if len(labels)>0:
p._expand_tie_param(len(labels))
idx = np.in1d(self.tied_param.tie,labels)
p.tied_param[:] = self.tied_param[idx]
p.tied_param.tie[:] = self.tied_param.tie[idx]
Tie.recoverTies(p)
# p._expand_tie_param(len(labels))
# idx = np.in1d(self.tied_param.tie,labels)
# p.tied_param[:] = self.tied_param[idx]
# p.tied_param.tie[:] = self.tied_param.tie[idx]
self._remove_unnecessary_ties()
self._sync_constraints([self._parent_], toTiedParam=True)
self._update_label_buf()
p.ties._update_label_buf()
self.update_model(True)

View file

@ -45,8 +45,7 @@ class SpikeAndSlabPrior(VariationalPrior):
self.pi = Param('Pi', pi, __fixed__)
self.link_parameter(self.pi)
def KL_divergence(self, variational_posterior):
def KL_divergence(self, variational_posterior, N=None):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
@ -58,14 +57,14 @@ class SpikeAndSlabPrior(VariationalPrior):
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
# TODO: sovle group_spike for parallelization
if self.group_spike:
var_gamma = (gamma*np.log(gamma/pi)).sum()/gamma.shape[0]+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()/gamma.shape[0]
assert N is not None
var_gamma = ((gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum())/N
else:
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
def update_gradients_KL(self, variational_posterior):
def update_gradients_KL(self, variational_posterior, N=None):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
@ -76,7 +75,8 @@ class SpikeAndSlabPrior(VariationalPrior):
pi = self.pi
if self.group_spike:
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/gamma.shape[0]+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
assert N is not None
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
else:
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= gamma*mu/self.variance
@ -84,17 +84,17 @@ class SpikeAndSlabPrior(VariationalPrior):
if self.learnPi:
if len(self.pi)==1:
if self.group_spike:
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()/gamma.shape[0]
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()/N
else:
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()
elif len(self.pi.shape)==1:
if self.group_spike:
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)/gamma.shape[0]
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)/N
else:
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
else:
if self.group_spike:
self.pi[idx].gradient = (gamma/self.pi[idx] - (1.-gamma)/(1.-self.pi[idx]))/gamma.shape[0]
self.pi[idx].gradient = (gamma/self.pi[idx] - (1.-gamma)/(1.-self.pi[idx]))/N
else:
self.pi[idx].gradient = (gamma/self.pi[idx] - (1.-gamma)/(1.-self.pi[idx]))

View file

@ -36,6 +36,7 @@ class SparseGP_MPI(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
self._IN_OPTIMIZATION_ = False
self.mpi_comm = mpi_comm
if mpi_comm != None:
if inference_method is None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
@ -49,10 +50,9 @@ class SparseGP_MPI(SparseGP):
self.link_parameter(variational_prior)
# self.X.fix()
self.mpi_comm = mpi_comm
# Manage the data (Y) division
if mpi_comm != None:
from ..util.mpi import divide_data
from ..util.parallel import divide_data
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
self.N_range = (N_start, N_end)
self.N_list = np.array(N_list)

View file

@ -428,6 +428,27 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
m.plot_scales("MRD Scales")
return m
def ssmrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy import kern
from GPy.models import SSMRD
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
#Ylist = [Ylist[0]]
k = kern.RBF(Q, ARD=True)
m = SSMRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw)
m['.*noise'] = [Y.var()/40. for Y in Ylist]
if optimize:
print "Optimizing Model:"
m.optimize(messages=verbose, max_iters=8e3, gtol=.1)
if plot:
m.X.plot("MRD Latent Space 1D")
m.plot_scales("MRD Scales")
return m
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy import kern
from GPy.models import MRD

View file

@ -389,9 +389,9 @@ def update_gradients(model, mpi_comm=None):
model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
# Update Log-likelihood
KL_div = model.variational_prior.KL_divergence(X)
KL_div = model.variational_prior.KL_divergence(X, N=model.Y.shape[0])
# update for the KL divergence
model.variational_prior.update_gradients_KL(X)
model.variational_prior.update_gradients_KL(X, N=model.Y.shape[0])
if mpi_comm != None:
KL_div_all = np.array(KL_div)

View file

@ -34,11 +34,16 @@ class RBF(Stationary):
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:
dc['psicomp'] = PSICOMP_RBF()
# if self.useGPU:
# dc['psicomp'] = PSICOMP_RBF()
# dc['useGPU'] = False
return dc
def __setstate__(self, state):
from ...util.gpu_init import gpu_initialized
if state['useGPU'] and not gpu_initialized:
state['useGPU'] = False
state['psicomp'] = PSICOMP_RBF()
return super(RBF, self).__setstate__(state)
def spectrum(self, omega):

View file

@ -81,7 +81,7 @@ class BayesianGPLVM(SparseGP):
self.link_parameter(self.X, index=0)
if mpi_comm != None:
from ..util.mpi import divide_data
from ..util.parallel import divide_data
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
self.N_range = (N_start, N_end)
self.N_list = np.array(N_list)

View file

@ -24,19 +24,13 @@ class SSGPLVM(SparseGP_MPI):
"""
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, **kwargs):
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM',
group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, variational_prior=None,**kwargs):
self.group_spike = group_spike
if X == None:
from ..util.initialization import initialize_latent
X, fracs = initialize_latent(init, input_dim, Y)
else:
fracs = np.ones(input_dim)
self.init = init
X = self._init_X(input_dim, Y, X, X_variance, Gamma, init)
X, fracs = self._init_X(input_dim, Y, X, X_variance, Gamma, init)
if Z is None:
Z = np.random.permutation(X.mean.copy())[:num_inducing]
@ -46,7 +40,7 @@ class SSGPLVM(SparseGP_MPI):
likelihood = Gaussian()
if kernel is None:
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)
if kernel.useGPU:
kernel.psicomp = PSICOMP_SSRBF_GPU()
@ -56,7 +50,11 @@ class SSGPLVM(SparseGP_MPI):
if pi is None:
pi = np.empty((input_dim))
pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) # the prior probability of the latent binary variable b
if variational_prior is None:
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) # the prior probability of the latent binary variable b
else:
self.variational_prior = variational_prior
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
# self.X.unfix()
@ -64,14 +62,14 @@ class SSGPLVM(SparseGP_MPI):
if self.group_spike:
[self.X.gamma[:,i].tie_together() for i in xrange(self.X.gamma.shape[1])] # Tie columns together
def _init_X(self, input_dim, Y=None, X=None, X_variance=None, Gamma=None, init='PCA'):
if X == None:
from ..util.initialization import initialize_latent
X, fracs = initialize_latent(init, input_dim, Y)
else:
fracs = np.ones(input_dim)
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
@ -83,9 +81,8 @@ class SSGPLVM(SparseGP_MPI):
else:
gamma = Gamma.copy()
return SpikeAndSlabPosterior(X, X_variance, gamma)
return SpikeAndSlabPosterior(X, X_variance, gamma), fracs
def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form."""
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
@ -126,3 +123,4 @@ class SSGPLVM(SparseGP_MPI):
from ..inference.latent_function_inference.inference_X import inference_newX
return inference_newX(self, Y_new, optimize=optimize)

View file

@ -2,33 +2,151 @@
The Maniforld Relevance Determination model with the spike-and-slab prior
"""
import numpy as np
from ..core import Model
from .ss_gplvm import SSGPLVM
from ..core.parameterization.variational import SpikeAndSlabPrior
from ..util.misc import param_to_array
from ..kern import RBF
class SSMRD(Model):
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
initx = 'PCA', initz = 'permute',
num_inducing=10, Z=None, kernel=None,
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute',
num_inducing=10, Zs=None, kernel=None, inference_method=None, likelihoods=None,
pi=0.5, name='ss_mrd', Ynames=None):
super(SSMRD, self).__init__(name)
self.updates = False
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx,
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods,
# initialize X for individual models
X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
if kernel is None:
kernel = RBF(input_dim, lengthscale=1./fracs, ARD=True)
if Zs is None:
Zs = [None]* len(Ylist)
if likelihoods is None:
likelihoods = [None]* len(Ylist)
self.var_priors = [VarPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=True) for i in xrange(len(Ylist))]
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i],learnPi=False, group_spike=True,
kernel=kernel.copy(),inference_method=inference_method,likelihood=likelihoods[i], variational_prior=self.var_priors[i],
name='model_'+str(i)) for i,y in enumerate(Ylist)]
self.add_parameters(*(self.models))
self.link_parameters(*(self.models))
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.mean.shape[1])]
for i in xrange(self.models[0].X.mean.shape[0])]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.variance.shape[1])]
for i in xrange(self.models[0].X.variance.shape[0])]
self.updates = True
self.models[0].X.mean.tie_vector(*[m.X.mean for m in self.models[1:]])
self.models[0].X.variance.tie_vector(*[m.X.variance for m in self.models[1:]])
self.models[0].kern.tie_vector(*[m.kern for m in self.models[1:]])
def parameters_changed(self):
varp_list = [m.X for m in self.models]
[vp._update_inernal(varp_list) for vp in self.var_priors]
super(SSMRD, self).parameters_changed()
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
def log_likelihood(self):
return self._log_marginal_likelihood
return self._log_marginal_likelihood
def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
# Divide latent dimensions
idx = np.empty((input_dim,),dtype=np.int)
residue = (input_dim)%(len(Ylist))
for i in xrange(len(Ylist)):
if i < residue:
size = input_dim/len(Ylist)+1
idx[i*size:(i+1)*size] = i
else:
size = input_dim/len(Ylist)
idx[i*size+residue:(i+1)*size+residue] = i
if X is None:
X = np.empty((Ylist[0].shape[0],input_dim))
fracs = np.empty((input_dim,))
from ..util.initialization import initialize_latent
for i in xrange(len(Ylist)):
Y = Ylist[i]
dim = (idx==i).sum()
if dim>0:
x, fr = initialize_latent('PCA', dim, Y)
X[:,idx==i] = x
fracs[idx==i] = fr
else:
fracs = np.ones(input_dim)
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
if Gammas is None:
Gammas = []
for x in X:
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
Gammas.append(gamma)
return X, X_variance, Gammas, fracs
class VarPrior_SSMRD(SpikeAndSlabPrior):
def __init__(self, nModels, pi=None, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
self.nModels = nModels
self._b_prob_all = 0.5
super(VarPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
if self.group_spike:
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob[0])
[np.multiply(self._b_prob_all, 1.-vp.binary_prob[0], self._b_prob_all) for vp in varp_list[1:]]
else:
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
[np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior, N=None):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
ml = self._highest_parent_
if hasattr(ml, 'models'):
varp_list = [m.X for m in ml.models]
[vp._update_inernal(varp_list) for vp in ml.var_priors]
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
if self.group_spike:
assert N is not None
var_gamma = ((gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum())/N
else:
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
def update_gradients_KL(self, variational_posterior, N=None):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
if self.group_spike:
assert N is not None
tmp = self._b_prob_all/(1.-gamma[0])
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
else:
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
if self.learnPi:
raise 'Not Supported!'

View file

@ -19,5 +19,8 @@ def initialize_latent(init, input_dim, Y):
Xr -= Xr.mean(0)
Xr /= Xr.std(0)
fracs = np.ones((input_dim,))
fracs[:var.shape[0]] = var/var.max()
return Xr, var/var.max()
return Xr, fracs

View file

@ -1,35 +0,0 @@
"""
The tools for mpi
"""
try:
import numpy as np
from mpi4py import MPI
numpy_to_MPI_typemap = {
np.dtype(np.float64) : MPI.DOUBLE,
np.dtype(np.float32) : MPI.FLOAT,
np.dtype(np.int) : MPI.INT,
np.dtype(np.int8) : MPI.CHAR,
np.dtype(np.uint8) : MPI.UNSIGNED_CHAR,
np.dtype(np.int32) : MPI.INT,
np.dtype(np.uint32) : MPI.UNSIGNED_INT,
}
except:
pass
def divide_data(datanum, comm):
residue = (datanum)%comm.size
datanum_list = np.empty((comm.size),dtype=np.int32)
for i in xrange(comm.size):
if i<residue:
datanum_list[i] = int(datanum/comm.size)+1
else:
datanum_list[i] = int(datanum/comm.size)
if comm.rank<residue:
size = datanum/comm.size+1
offset = size*comm.rank
else:
size = datanum/comm.size
offset = size*comm.rank+residue
return offset, offset+size, datanum_list

View file

@ -1,12 +1,38 @@
"""
The module of tools for parallelization (MPI)
"""
try:
import numpy as np
from mpi4py import MPI
numpy_to_MPI_typemap = {
np.dtype(np.float64) : MPI.DOUBLE,
np.dtype(np.float32) : MPI.FLOAT,
np.dtype(np.int) : MPI.INT,
np.dtype(np.int8) : MPI.CHAR,
np.dtype(np.uint8) : MPI.UNSIGNED_CHAR,
np.dtype(np.int32) : MPI.INT,
np.dtype(np.uint32) : MPI.UNSIGNED_INT,
}
except:
pass
def divide_data(datanum, comm):
residue = (datanum)%comm.size
datanum_list = np.empty((comm.size),dtype=np.int32)
for i in xrange(comm.size):
if i<residue:
datanum_list[i] = int(datanum/comm.size)+1
else:
datanum_list[i] = int(datanum/comm.size)
if comm.rank<residue:
size = datanum/comm.size+1
offset = size*comm.rank
else:
size = datanum/comm.size
offset = size*comm.rank+residue
return offset, offset+size, datanum_list
def get_id_within_node(comm=MPI.COMM_WORLD):
rank = comm.rank
nodename = MPI.Get_processor_name()