diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index ce39e2c9..ac1dfc63 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -40,6 +40,7 @@ class SpikeAndSlabPrior(VariationalPrior): self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10)) self.variance = Param('variance',variance) self.add_parameters(self.pi) + self.group_spike_prob = False def KL_divergence(self, variational_posterior): mu = variational_posterior.mean @@ -55,7 +56,11 @@ class SpikeAndSlabPrior(VariationalPrior): S = variational_posterior.variance gamma = variational_posterior.binary_prob - gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. + if self.group_spike_prob: + gamma_grad = np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. + gamma.gradient -= gamma_grad.mean(axis=0) + else: + gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. mu.gradient -= gamma*mu S.gradient -= (1. - (1. / (S))) * gamma /2. self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0) diff --git a/GPy/inference/latent_function_inference/var_dtc_gpu.py b/GPy/inference/latent_function_inference/var_dtc_gpu.py index 669d8b97..ba7ec602 100644 --- a/GPy/inference/latent_function_inference/var_dtc_gpu.py +++ b/GPy/inference/latent_function_inference/var_dtc_gpu.py @@ -17,7 +17,7 @@ try: from pycuda.reduction import ReductionKernel from ...util.linalg_gpu import logDiagSum except: - print 'Error in importing GPU modules!' + pass class VarDTC_GPU(object): """ diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py index bb69b88d..4b29b16a 100644 --- a/GPy/inference/latent_function_inference/var_dtc_parallel.py +++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py @@ -279,4 +279,54 @@ class VarDTC_minibatch(object): 'dL_dthetaL':dL_dthetaL} return isEnd, (n_start,n_end), grad_dict + + +def update_gradients(model): + model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y) + het_noise = model.likelihood.variance.size > 1 + + if het_noise: + dL_dthetaL = np.empty((model.Y.shape[0],)) + else: + dL_dthetaL = 0 + + #gradients w.r.t. kernel + model.kern.update_gradients_full(dL_dKmm, model.Z, None) + kern_grad = model.kern.gradient.copy() + + #gradients w.r.t. Z + model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z) + + isEnd = False + while not isEnd: + isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y) + if isinstance(model.X, VariationalPosterior): + + #gradients w.r.t. kernel + model.kern.update_gradients_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) + kern_grad += model.kern.gradient + + #gradients w.r.t. Z + model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations( + grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=model.X[n_range[0]:n_range[1]]) + + #gradients w.r.t. posterior parameters of X + X_grad = model.kern.gradients_qX_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) + model.set_X_gradients(model.X[n_range[0]:n_range[1]], X_grad) + + if het_noise: + dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] + else: + dL_dthetaL += grad_dict['dL_dthetaL'] + + # Set the gradients w.r.t. kernel + model.kern.gradient = kern_grad + + # Update Log-likelihood + model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X) + # update for the KL divergence + model.variational_prior.update_gradients_KL(model.X) + + # dL_dthetaL + model.likelihood.update_gradients(dL_dthetaL) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index c2877d06..3ffe1f5b 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -22,6 +22,7 @@ class RBF(Stationary): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'): super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) self.weave_options = {} + self.group_spike_prob = False def K_of_r(self, r): return self.variance * np.exp(-0.5 * r**2) @@ -158,6 +159,9 @@ class RBF(Stationary): grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) + + if self.group_spike_prob: + grad_gamma[:] = grad_gamma.mean(axis=0) return grad_mu, grad_S, grad_gamma diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 95230f54..974d3d61 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -9,6 +9,8 @@ from ..likelihoods import Gaussian from ..inference.optimization import SCG from ..util import linalg from ..core.parameterization.variational import NormalPosterior, NormalPrior,VariationalPosterior +from ..inference.latent_function_inference.var_dtc_parallel import update_gradients +from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU class BayesianGPLVM(SparseGP): """ @@ -67,8 +69,9 @@ class BayesianGPLVM(SparseGP): X.mean.gradient, X.variance.gradient = X_grad def parameters_changed(self): - update_gradients(self) - return + if isinstance(self.inference_method, VarDTC_GPU): + update_gradients(self) + return super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) @@ -158,57 +161,7 @@ class BayesianGPLVM(SparseGP): from ..plotting.matplot_dep import dim_reduction_plots return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) - - -def update_gradients(model): - model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y) - het_noise = model.likelihood.variance.size > 1 - - if het_noise: - dL_dthetaL = np.empty((model.Y.shape[0],)) - else: - dL_dthetaL = 0 - - #gradients w.r.t. kernel - model.kern.update_gradients_full(dL_dKmm, model.Z, None) - kern_grad = model.kern.gradient.copy() - - #gradients w.r.t. Z - model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z) - - isEnd = False - while not isEnd: - isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y) - if isinstance(model.X, VariationalPosterior): - - #gradients w.r.t. kernel - model.kern.update_gradients_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) - kern_grad += model.kern.gradient - - #gradients w.r.t. Z - model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations( - grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=model.X[n_range[0]:n_range[1]]) - - #gradients w.r.t. posterior parameters of X - X_grad = model.kern.gradients_qX_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) - model.set_X_gradients(model.X[n_range[0]:n_range[1]], X_grad) - - if het_noise: - dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] - else: - dL_dthetaL += grad_dict['dL_dthetaL'] - - # Set the gradients w.r.t. kernel - model.kern.gradient = kern_grad - - # Update Log-likelihood - model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X) - # update for the KL divergence - model.variational_prior.update_gradients_KL(model.X) - - # dL_dthetaL - model.likelihood.update_gradients(dL_dthetaL) def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): """ diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index 1c2ecf4c..e32745c7 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -25,7 +25,7 @@ class SSGPLVM(SparseGP): """ def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, - Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', **kwargs): + Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', group_spike=False, **kwargs): if X == None: # The mean of variational approximation (mu) from ..util.initialization import initialize_latent @@ -38,6 +38,9 @@ class SSGPLVM(SparseGP): gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim) + if group_spike: + gamma[:] = gamma.mean(axis=0) + if Z is None: Z = np.random.permutation(X.copy())[:num_inducing] assert Z.shape[1] == X.shape[1] @@ -47,11 +50,16 @@ class SSGPLVM(SparseGP): if kernel is None: kernel = kern.SSRBF(input_dim) - + pi = np.empty((input_dim)) pi[:] = 0.5 self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b X = SpikeAndSlabPosterior(X, X_variance, gamma) + + if group_spike: + kernel.group_spike_prob = True + self.variational_prior.group_spike_prob = True + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) self.add_parameter(self.X, index=0) diff --git a/GPy/plotting/matplot_dep/variational_plots.py b/GPy/plotting/matplot_dep/variational_plots.py index cf00d8a2..27cb4051 100644 --- a/GPy/plotting/matplot_dep/variational_plots.py +++ b/GPy/plotting/matplot_dep/variational_plots.py @@ -45,7 +45,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None): fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95)) return fig -def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None): +def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_side=True): """ Plot latent space X in 1D: @@ -58,7 +58,10 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None): """ if ax is None: - fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1])))) + if side_by_side: + fig = pb.figure(num=fignum, figsize=(16, min(12, (2 * parameterized.mean.shape[1])))) + else: + fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1])))) if colors is None: colors = pb.gca()._get_lines.color_cycle pb.clf() @@ -68,8 +71,15 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None): means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob) x = np.arange(means.shape[0]) for i in range(means.shape[1]): + if side_by_side: + sub1 = (means.shape[1],2,2*i+1) + sub2 = (means.shape[1],2,2*i+2) + else: + sub1 = (means.shape[1]*2,1,2*i+1) + sub2 = (means.shape[1]*2,1,2*i+2) + # mean and variance plot - a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 1) + a = fig.add_subplot(*sub1) a.plot(means, c='k', alpha=.3) plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i))) a.fill_between(x, @@ -82,7 +92,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None): if i < means.shape[1] - 1: a.set_xticklabels('') # binary prob plot - a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 2) + a = fig.add_subplot(*sub2) a.bar(x,gamma[:,i],bottom=0.,linewidth=0,align='center') a.set_xlim(x.min(), x.max()) a.set_ylim([0.,1.])