mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-30 14:35:15 +02:00
bug fix for tie: merge tie
This commit is contained in:
parent
31b451494d
commit
84b096d20c
12 changed files with 230 additions and 89 deletions
|
|
@ -112,6 +112,8 @@ class Tie(Parameterized):
|
|||
def _sync_constraints_p(p, tieparam, read, cons):
|
||||
if p.tie is not None:
|
||||
p = p._original_
|
||||
if p is tieparam:
|
||||
return
|
||||
labels = np.unique(p.tie)
|
||||
labels = labels[labels>0]
|
||||
for l in labels:
|
||||
|
|
@ -146,7 +148,6 @@ class Tie(Parameterized):
|
|||
if not p.has_parent():
|
||||
p.ties = Tie()
|
||||
p.link_parameter(p.ties, -1)
|
||||
p.add_observer(p.ties, p.ties._parameters_changed_notification, priority=-500)
|
||||
|
||||
p.update_model(False)
|
||||
labels = p.ties._get_labels([p])
|
||||
|
|
@ -157,6 +158,7 @@ class Tie(Parameterized):
|
|||
p.ties._sync_val([p],toTiedParam=True)
|
||||
p.ties._sync_constraints([p], toTiedParam=True)
|
||||
p.ties._update_label_buf()
|
||||
p.add_observer(p.ties, p.ties._parameters_changed_notification, priority=-500)
|
||||
p.update_model(True)
|
||||
|
||||
def mergeTies(self, p):
|
||||
|
|
@ -168,6 +170,7 @@ class Tie(Parameterized):
|
|||
self.tied_param[-p.ties.tied_param.size:] = p.ties.tied_param
|
||||
pairs = zip(self.tied_param.tie,tie_labels)
|
||||
self._replace_labels(p, pairs)
|
||||
self._sync_constraints([self._parent_], toTiedParam=True)
|
||||
p.remove_observer(p.ties)
|
||||
p.unlink_parameter(p.ties)
|
||||
del p.ties
|
||||
|
|
@ -184,11 +187,13 @@ class Tie(Parameterized):
|
|||
labels = self._get_labels([p])
|
||||
labels = labels[labels>0]
|
||||
if len(labels)>0:
|
||||
p._expand_tie_param(len(labels))
|
||||
idx = np.in1d(self.tied_param.tie,labels)
|
||||
p.tied_param[:] = self.tied_param[idx]
|
||||
p.tied_param.tie[:] = self.tied_param.tie[idx]
|
||||
Tie.recoverTies(p)
|
||||
# p._expand_tie_param(len(labels))
|
||||
# idx = np.in1d(self.tied_param.tie,labels)
|
||||
# p.tied_param[:] = self.tied_param[idx]
|
||||
# p.tied_param.tie[:] = self.tied_param.tie[idx]
|
||||
self._remove_unnecessary_ties()
|
||||
self._sync_constraints([self._parent_], toTiedParam=True)
|
||||
self._update_label_buf()
|
||||
p.ties._update_label_buf()
|
||||
self.update_model(True)
|
||||
|
|
|
|||
|
|
@ -45,8 +45,7 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
self.pi = Param('Pi', pi, __fixed__)
|
||||
self.link_parameter(self.pi)
|
||||
|
||||
|
||||
def KL_divergence(self, variational_posterior):
|
||||
def KL_divergence(self, variational_posterior, N=None):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
|
@ -58,14 +57,14 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
|
||||
var_mean = np.square(mu)/self.variance
|
||||
var_S = (S/self.variance - np.log(S))
|
||||
# TODO: sovle group_spike for parallelization
|
||||
if self.group_spike:
|
||||
var_gamma = (gamma*np.log(gamma/pi)).sum()/gamma.shape[0]+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()/gamma.shape[0]
|
||||
assert N is not None
|
||||
var_gamma = ((gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum())/N
|
||||
else:
|
||||
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
|
||||
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
|
||||
|
||||
def update_gradients_KL(self, variational_posterior):
|
||||
def update_gradients_KL(self, variational_posterior, N=None):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
|
@ -76,7 +75,8 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
pi = self.pi
|
||||
|
||||
if self.group_spike:
|
||||
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/gamma.shape[0]+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
assert N is not None
|
||||
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
else:
|
||||
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
mu.gradient -= gamma*mu/self.variance
|
||||
|
|
@ -84,17 +84,17 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
if self.learnPi:
|
||||
if len(self.pi)==1:
|
||||
if self.group_spike:
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()/gamma.shape[0]
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()/N
|
||||
else:
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()
|
||||
elif len(self.pi.shape)==1:
|
||||
if self.group_spike:
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)/gamma.shape[0]
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)/N
|
||||
else:
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
|
||||
else:
|
||||
if self.group_spike:
|
||||
self.pi[idx].gradient = (gamma/self.pi[idx] - (1.-gamma)/(1.-self.pi[idx]))/gamma.shape[0]
|
||||
self.pi[idx].gradient = (gamma/self.pi[idx] - (1.-gamma)/(1.-self.pi[idx]))/N
|
||||
else:
|
||||
self.pi[idx].gradient = (gamma/self.pi[idx] - (1.-gamma)/(1.-self.pi[idx]))
|
||||
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ class SparseGP_MPI(SparseGP):
|
|||
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
|
||||
self._IN_OPTIMIZATION_ = False
|
||||
self.mpi_comm = mpi_comm
|
||||
if mpi_comm != None:
|
||||
if inference_method is None:
|
||||
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
|
||||
|
|
@ -49,10 +50,9 @@ class SparseGP_MPI(SparseGP):
|
|||
self.link_parameter(variational_prior)
|
||||
# self.X.fix()
|
||||
|
||||
self.mpi_comm = mpi_comm
|
||||
# Manage the data (Y) division
|
||||
if mpi_comm != None:
|
||||
from ..util.mpi import divide_data
|
||||
from ..util.parallel import divide_data
|
||||
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
|
||||
self.N_range = (N_start, N_end)
|
||||
self.N_list = np.array(N_list)
|
||||
|
|
|
|||
|
|
@ -428,6 +428,27 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
|||
m.plot_scales("MRD Scales")
|
||||
return m
|
||||
|
||||
def ssmrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||
from GPy import kern
|
||||
from GPy.models import SSMRD
|
||||
|
||||
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
|
||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||
|
||||
#Ylist = [Ylist[0]]
|
||||
k = kern.RBF(Q, ARD=True)
|
||||
m = SSMRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw)
|
||||
|
||||
m['.*noise'] = [Y.var()/40. for Y in Ylist]
|
||||
|
||||
if optimize:
|
||||
print "Optimizing Model:"
|
||||
m.optimize(messages=verbose, max_iters=8e3, gtol=.1)
|
||||
if plot:
|
||||
m.X.plot("MRD Latent Space 1D")
|
||||
m.plot_scales("MRD Scales")
|
||||
return m
|
||||
|
||||
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||
from GPy import kern
|
||||
from GPy.models import MRD
|
||||
|
|
|
|||
|
|
@ -389,9 +389,9 @@ def update_gradients(model, mpi_comm=None):
|
|||
model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
|
||||
# Update Log-likelihood
|
||||
KL_div = model.variational_prior.KL_divergence(X)
|
||||
KL_div = model.variational_prior.KL_divergence(X, N=model.Y.shape[0])
|
||||
# update for the KL divergence
|
||||
model.variational_prior.update_gradients_KL(X)
|
||||
model.variational_prior.update_gradients_KL(X, N=model.Y.shape[0])
|
||||
|
||||
if mpi_comm != None:
|
||||
KL_div_all = np.array(KL_div)
|
||||
|
|
|
|||
|
|
@ -34,11 +34,16 @@ class RBF(Stationary):
|
|||
|
||||
def __getstate__(self):
|
||||
dc = super(RBF, self).__getstate__()
|
||||
if self.useGPU:
|
||||
dc['psicomp'] = PSICOMP_RBF()
|
||||
# if self.useGPU:
|
||||
# dc['psicomp'] = PSICOMP_RBF()
|
||||
# dc['useGPU'] = False
|
||||
return dc
|
||||
|
||||
def __setstate__(self, state):
|
||||
from ...util.gpu_init import gpu_initialized
|
||||
if state['useGPU'] and not gpu_initialized:
|
||||
state['useGPU'] = False
|
||||
state['psicomp'] = PSICOMP_RBF()
|
||||
return super(RBF, self).__setstate__(state)
|
||||
|
||||
def spectrum(self, omega):
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ class BayesianGPLVM(SparseGP):
|
|||
self.link_parameter(self.X, index=0)
|
||||
|
||||
if mpi_comm != None:
|
||||
from ..util.mpi import divide_data
|
||||
from ..util.parallel import divide_data
|
||||
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
|
||||
self.N_range = (N_start, N_end)
|
||||
self.N_list = np.array(N_list)
|
||||
|
|
|
|||
|
|
@ -24,19 +24,13 @@ class SSGPLVM(SparseGP_MPI):
|
|||
|
||||
"""
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, **kwargs):
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM',
|
||||
group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, variational_prior=None,**kwargs):
|
||||
|
||||
self.group_spike = group_spike
|
||||
|
||||
if X == None:
|
||||
from ..util.initialization import initialize_latent
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
|
||||
self.init = init
|
||||
|
||||
X = self._init_X(input_dim, Y, X, X_variance, Gamma, init)
|
||||
X, fracs = self._init_X(input_dim, Y, X, X_variance, Gamma, init)
|
||||
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.mean.copy())[:num_inducing]
|
||||
|
|
@ -46,7 +40,7 @@ class SSGPLVM(SparseGP_MPI):
|
|||
likelihood = Gaussian()
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
||||
kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)
|
||||
if kernel.useGPU:
|
||||
kernel.psicomp = PSICOMP_SSRBF_GPU()
|
||||
|
||||
|
|
@ -56,7 +50,11 @@ class SSGPLVM(SparseGP_MPI):
|
|||
if pi is None:
|
||||
pi = np.empty((input_dim))
|
||||
pi[:] = 0.5
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) # the prior probability of the latent binary variable b
|
||||
|
||||
if variational_prior is None:
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) # the prior probability of the latent binary variable b
|
||||
else:
|
||||
self.variational_prior = variational_prior
|
||||
|
||||
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
|
||||
# self.X.unfix()
|
||||
|
|
@ -64,14 +62,14 @@ class SSGPLVM(SparseGP_MPI):
|
|||
|
||||
if self.group_spike:
|
||||
[self.X.gamma[:,i].tie_together() for i in xrange(self.X.gamma.shape[1])] # Tie columns together
|
||||
|
||||
|
||||
def _init_X(self, input_dim, Y=None, X=None, X_variance=None, Gamma=None, init='PCA'):
|
||||
if X == None:
|
||||
from ..util.initialization import initialize_latent
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
|
||||
|
||||
if X_variance is None: # The variance of the variational approximation (S)
|
||||
X_variance = np.random.uniform(0,.1,X.shape)
|
||||
|
||||
|
|
@ -83,9 +81,8 @@ class SSGPLVM(SparseGP_MPI):
|
|||
else:
|
||||
gamma = Gamma.copy()
|
||||
|
||||
return SpikeAndSlabPosterior(X, X_variance, gamma)
|
||||
|
||||
|
||||
return SpikeAndSlabPosterior(X, X_variance, gamma), fracs
|
||||
|
||||
def set_X_gradients(self, X, X_grad):
|
||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
|
||||
|
|
@ -126,3 +123,4 @@ class SSGPLVM(SparseGP_MPI):
|
|||
from ..inference.latent_function_inference.inference_X import inference_newX
|
||||
return inference_newX(self, Y_new, optimize=optimize)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,33 +2,151 @@
|
|||
The Maniforld Relevance Determination model with the spike-and-slab prior
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from ..core import Model
|
||||
from .ss_gplvm import SSGPLVM
|
||||
from ..core.parameterization.variational import SpikeAndSlabPrior
|
||||
from ..util.misc import param_to_array
|
||||
from ..kern import RBF
|
||||
|
||||
class SSMRD(Model):
|
||||
|
||||
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
|
||||
initx = 'PCA', initz = 'permute',
|
||||
num_inducing=10, Z=None, kernel=None,
|
||||
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
|
||||
def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute',
|
||||
num_inducing=10, Zs=None, kernel=None, inference_method=None, likelihoods=None,
|
||||
pi=0.5, name='ss_mrd', Ynames=None):
|
||||
super(SSMRD, self).__init__(name)
|
||||
|
||||
self.updates = False
|
||||
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx,
|
||||
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods,
|
||||
# initialize X for individual models
|
||||
X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
|
||||
|
||||
if kernel is None:
|
||||
kernel = RBF(input_dim, lengthscale=1./fracs, ARD=True)
|
||||
if Zs is None:
|
||||
Zs = [None]* len(Ylist)
|
||||
if likelihoods is None:
|
||||
likelihoods = [None]* len(Ylist)
|
||||
|
||||
self.var_priors = [VarPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=True) for i in xrange(len(Ylist))]
|
||||
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i],learnPi=False, group_spike=True,
|
||||
kernel=kernel.copy(),inference_method=inference_method,likelihood=likelihoods[i], variational_prior=self.var_priors[i],
|
||||
name='model_'+str(i)) for i,y in enumerate(Ylist)]
|
||||
self.add_parameters(*(self.models))
|
||||
self.link_parameters(*(self.models))
|
||||
|
||||
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.mean.shape[1])]
|
||||
for i in xrange(self.models[0].X.mean.shape[0])]
|
||||
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.variance.shape[1])]
|
||||
for i in xrange(self.models[0].X.variance.shape[0])]
|
||||
|
||||
self.updates = True
|
||||
self.models[0].X.mean.tie_vector(*[m.X.mean for m in self.models[1:]])
|
||||
self.models[0].X.variance.tie_vector(*[m.X.variance for m in self.models[1:]])
|
||||
self.models[0].kern.tie_vector(*[m.kern for m in self.models[1:]])
|
||||
|
||||
def parameters_changed(self):
|
||||
varp_list = [m.X for m in self.models]
|
||||
[vp._update_inernal(varp_list) for vp in self.var_priors]
|
||||
super(SSMRD, self).parameters_changed()
|
||||
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
|
||||
|
||||
def log_likelihood(self):
|
||||
return self._log_marginal_likelihood
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
|
||||
|
||||
# Divide latent dimensions
|
||||
idx = np.empty((input_dim,),dtype=np.int)
|
||||
residue = (input_dim)%(len(Ylist))
|
||||
for i in xrange(len(Ylist)):
|
||||
if i < residue:
|
||||
size = input_dim/len(Ylist)+1
|
||||
idx[i*size:(i+1)*size] = i
|
||||
else:
|
||||
size = input_dim/len(Ylist)
|
||||
idx[i*size+residue:(i+1)*size+residue] = i
|
||||
|
||||
if X is None:
|
||||
X = np.empty((Ylist[0].shape[0],input_dim))
|
||||
fracs = np.empty((input_dim,))
|
||||
from ..util.initialization import initialize_latent
|
||||
for i in xrange(len(Ylist)):
|
||||
Y = Ylist[i]
|
||||
dim = (idx==i).sum()
|
||||
if dim>0:
|
||||
x, fr = initialize_latent('PCA', dim, Y)
|
||||
X[:,idx==i] = x
|
||||
fracs[idx==i] = fr
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
|
||||
if X_variance is None: # The variance of the variational approximation (S)
|
||||
X_variance = np.random.uniform(0,.1,X.shape)
|
||||
|
||||
if Gammas is None:
|
||||
Gammas = []
|
||||
for x in X:
|
||||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
|
||||
gamma[gamma>1.-1e-9] = 1.-1e-9
|
||||
gamma[gamma<1e-9] = 1e-9
|
||||
Gammas.append(gamma)
|
||||
return X, X_variance, Gammas, fracs
|
||||
|
||||
|
||||
|
||||
|
||||
class VarPrior_SSMRD(SpikeAndSlabPrior):
|
||||
def __init__(self, nModels, pi=None, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
|
||||
self.nModels = nModels
|
||||
self._b_prob_all = 0.5
|
||||
super(VarPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
|
||||
|
||||
def _update_inernal(self, varp_list):
|
||||
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
|
||||
# The probability for the binary variable for the same latent dimension of any of the models is on.
|
||||
if self.group_spike:
|
||||
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob[0])
|
||||
[np.multiply(self._b_prob_all, 1.-vp.binary_prob[0], self._b_prob_all) for vp in varp_list[1:]]
|
||||
else:
|
||||
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
|
||||
[np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]
|
||||
|
||||
|
||||
def KL_divergence(self, variational_posterior, N=None):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
if len(self.pi.shape)==2:
|
||||
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
|
||||
pi = self.pi[idx]
|
||||
else:
|
||||
pi = self.pi
|
||||
|
||||
ml = self._highest_parent_
|
||||
if hasattr(ml, 'models'):
|
||||
varp_list = [m.X for m in ml.models]
|
||||
[vp._update_inernal(varp_list) for vp in ml.var_priors]
|
||||
|
||||
var_mean = np.square(mu)/self.variance
|
||||
var_S = (S/self.variance - np.log(S))
|
||||
if self.group_spike:
|
||||
assert N is not None
|
||||
var_gamma = ((gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum())/N
|
||||
else:
|
||||
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
|
||||
return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
|
||||
|
||||
def update_gradients_KL(self, variational_posterior, N=None):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
if len(self.pi.shape)==2:
|
||||
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
|
||||
pi = self.pi[idx]
|
||||
else:
|
||||
pi = self.pi
|
||||
|
||||
if self.group_spike:
|
||||
assert N is not None
|
||||
tmp = self._b_prob_all/(1.-gamma[0])
|
||||
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
else:
|
||||
gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
|
||||
S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
|
||||
if self.learnPi:
|
||||
raise 'Not Supported!'
|
||||
|
||||
|
|
|
|||
|
|
@ -19,5 +19,8 @@ def initialize_latent(init, input_dim, Y):
|
|||
|
||||
Xr -= Xr.mean(0)
|
||||
Xr /= Xr.std(0)
|
||||
|
||||
fracs = np.ones((input_dim,))
|
||||
fracs[:var.shape[0]] = var/var.max()
|
||||
|
||||
return Xr, var/var.max()
|
||||
return Xr, fracs
|
||||
|
|
|
|||
|
|
@ -1,35 +0,0 @@
|
|||
"""
|
||||
The tools for mpi
|
||||
"""
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
numpy_to_MPI_typemap = {
|
||||
np.dtype(np.float64) : MPI.DOUBLE,
|
||||
np.dtype(np.float32) : MPI.FLOAT,
|
||||
np.dtype(np.int) : MPI.INT,
|
||||
np.dtype(np.int8) : MPI.CHAR,
|
||||
np.dtype(np.uint8) : MPI.UNSIGNED_CHAR,
|
||||
np.dtype(np.int32) : MPI.INT,
|
||||
np.dtype(np.uint32) : MPI.UNSIGNED_INT,
|
||||
}
|
||||
except:
|
||||
pass
|
||||
|
||||
def divide_data(datanum, comm):
|
||||
|
||||
residue = (datanum)%comm.size
|
||||
datanum_list = np.empty((comm.size),dtype=np.int32)
|
||||
for i in xrange(comm.size):
|
||||
if i<residue:
|
||||
datanum_list[i] = int(datanum/comm.size)+1
|
||||
else:
|
||||
datanum_list[i] = int(datanum/comm.size)
|
||||
if comm.rank<residue:
|
||||
size = datanum/comm.size+1
|
||||
offset = size*comm.rank
|
||||
else:
|
||||
size = datanum/comm.size
|
||||
offset = size*comm.rank+residue
|
||||
return offset, offset+size, datanum_list
|
||||
|
|
@ -1,12 +1,38 @@
|
|||
"""
|
||||
The module of tools for parallelization (MPI)
|
||||
"""
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
numpy_to_MPI_typemap = {
|
||||
np.dtype(np.float64) : MPI.DOUBLE,
|
||||
np.dtype(np.float32) : MPI.FLOAT,
|
||||
np.dtype(np.int) : MPI.INT,
|
||||
np.dtype(np.int8) : MPI.CHAR,
|
||||
np.dtype(np.uint8) : MPI.UNSIGNED_CHAR,
|
||||
np.dtype(np.int32) : MPI.INT,
|
||||
np.dtype(np.uint32) : MPI.UNSIGNED_INT,
|
||||
}
|
||||
except:
|
||||
pass
|
||||
|
||||
def divide_data(datanum, comm):
|
||||
|
||||
residue = (datanum)%comm.size
|
||||
datanum_list = np.empty((comm.size),dtype=np.int32)
|
||||
for i in xrange(comm.size):
|
||||
if i<residue:
|
||||
datanum_list[i] = int(datanum/comm.size)+1
|
||||
else:
|
||||
datanum_list[i] = int(datanum/comm.size)
|
||||
if comm.rank<residue:
|
||||
size = datanum/comm.size+1
|
||||
offset = size*comm.rank
|
||||
else:
|
||||
size = datanum/comm.size
|
||||
offset = size*comm.rank+residue
|
||||
return offset, offset+size, datanum_list
|
||||
|
||||
def get_id_within_node(comm=MPI.COMM_WORLD):
|
||||
rank = comm.rank
|
||||
nodename = MPI.Get_processor_name()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue