mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
[GPU] GPU kernel
This commit is contained in:
parent
b5b17b9715
commit
53627ee282
7 changed files with 90 additions and 60 deletions
|
|
@ -40,6 +40,7 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10))
|
||||
self.variance = Param('variance',variance)
|
||||
self.add_parameters(self.pi)
|
||||
self.group_spike_prob = False
|
||||
|
||||
def KL_divergence(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
|
|
@ -55,7 +56,11 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
|
||||
if self.group_spike_prob:
|
||||
gamma_grad = np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
|
||||
gamma.gradient -= gamma_grad.mean(axis=0)
|
||||
else:
|
||||
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
|
||||
mu.gradient -= gamma*mu
|
||||
S.gradient -= (1. - (1. / (S))) * gamma /2.
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ try:
|
|||
from pycuda.reduction import ReductionKernel
|
||||
from ...util.linalg_gpu import logDiagSum
|
||||
except:
|
||||
print 'Error in importing GPU modules!'
|
||||
pass
|
||||
|
||||
class VarDTC_GPU(object):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -279,4 +279,54 @@ class VarDTC_minibatch(object):
|
|||
'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
return isEnd, (n_start,n_end), grad_dict
|
||||
|
||||
|
||||
def update_gradients(model):
|
||||
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
|
||||
het_noise = model.likelihood.variance.size > 1
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL = np.empty((model.Y.shape[0],))
|
||||
else:
|
||||
dL_dthetaL = 0
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
|
||||
kern_grad = model.kern.gradient.copy()
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
|
||||
isEnd = False
|
||||
while not isEnd:
|
||||
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
kern_grad += model.kern.gradient
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=model.X[n_range[0]:n_range[1]])
|
||||
|
||||
#gradients w.r.t. posterior parameters of X
|
||||
X_grad = model.kern.gradients_qX_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
model.set_X_gradients(model.X[n_range[0]:n_range[1]], X_grad)
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
|
||||
else:
|
||||
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||
|
||||
# Set the gradients w.r.t. kernel
|
||||
model.kern.gradient = kern_grad
|
||||
|
||||
# Update Log-likelihood
|
||||
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
|
||||
# update for the KL divergence
|
||||
model.variational_prior.update_gradients_KL(model.X)
|
||||
|
||||
# dL_dthetaL
|
||||
model.likelihood.update_gradients(dL_dthetaL)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ class RBF(Stationary):
|
|||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'):
|
||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
self.weave_options = {}
|
||||
self.group_spike_prob = False
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance * np.exp(-0.5 * r**2)
|
||||
|
|
@ -158,6 +159,9 @@ class RBF(Stationary):
|
|||
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
|
||||
if self.group_spike_prob:
|
||||
grad_gamma[:] = grad_gamma.mean(axis=0)
|
||||
|
||||
return grad_mu, grad_S, grad_gamma
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ from ..likelihoods import Gaussian
|
|||
from ..inference.optimization import SCG
|
||||
from ..util import linalg
|
||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior,VariationalPosterior
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
|
||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||
|
||||
class BayesianGPLVM(SparseGP):
|
||||
"""
|
||||
|
|
@ -67,8 +69,9 @@ class BayesianGPLVM(SparseGP):
|
|||
X.mean.gradient, X.variance.gradient = X_grad
|
||||
|
||||
def parameters_changed(self):
|
||||
update_gradients(self)
|
||||
return
|
||||
if isinstance(self.inference_method, VarDTC_GPU):
|
||||
update_gradients(self)
|
||||
return
|
||||
|
||||
super(BayesianGPLVM, self).parameters_changed()
|
||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
|
|
@ -158,57 +161,7 @@ class BayesianGPLVM(SparseGP):
|
|||
from ..plotting.matplot_dep import dim_reduction_plots
|
||||
|
||||
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
|
||||
|
||||
|
||||
def update_gradients(model):
|
||||
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
|
||||
het_noise = model.likelihood.variance.size > 1
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL = np.empty((model.Y.shape[0],))
|
||||
else:
|
||||
dL_dthetaL = 0
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
|
||||
kern_grad = model.kern.gradient.copy()
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
|
||||
isEnd = False
|
||||
while not isEnd:
|
||||
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
kern_grad += model.kern.gradient
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=model.X[n_range[0]:n_range[1]])
|
||||
|
||||
#gradients w.r.t. posterior parameters of X
|
||||
X_grad = model.kern.gradients_qX_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
model.set_X_gradients(model.X[n_range[0]:n_range[1]], X_grad)
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
|
||||
else:
|
||||
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||
|
||||
# Set the gradients w.r.t. kernel
|
||||
model.kern.gradient = kern_grad
|
||||
|
||||
# Update Log-likelihood
|
||||
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
|
||||
# update for the KL divergence
|
||||
model.variational_prior.update_gradients_KL(model.X)
|
||||
|
||||
# dL_dthetaL
|
||||
model.likelihood.update_gradients(dL_dthetaL)
|
||||
|
||||
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class SSGPLVM(SparseGP):
|
|||
|
||||
"""
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', **kwargs):
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', group_spike=False, **kwargs):
|
||||
|
||||
if X == None: # The mean of variational approximation (mu)
|
||||
from ..util.initialization import initialize_latent
|
||||
|
|
@ -38,6 +38,9 @@ class SSGPLVM(SparseGP):
|
|||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
|
||||
|
||||
if group_spike:
|
||||
gamma[:] = gamma.mean(axis=0)
|
||||
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||
assert Z.shape[1] == X.shape[1]
|
||||
|
|
@ -47,11 +50,16 @@ class SSGPLVM(SparseGP):
|
|||
|
||||
if kernel is None:
|
||||
kernel = kern.SSRBF(input_dim)
|
||||
|
||||
|
||||
pi = np.empty((input_dim))
|
||||
pi[:] = 0.5
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
|
||||
X = SpikeAndSlabPosterior(X, X_variance, gamma)
|
||||
|
||||
if group_spike:
|
||||
kernel.group_spike_prob = True
|
||||
self.variational_prior.group_spike_prob = True
|
||||
|
||||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||
self.add_parameter(self.X, index=0)
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
|
|||
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
||||
return fig
|
||||
|
||||
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
||||
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_side=True):
|
||||
"""
|
||||
Plot latent space X in 1D:
|
||||
|
||||
|
|
@ -58,7 +58,10 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
if side_by_side:
|
||||
fig = pb.figure(num=fignum, figsize=(16, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
else:
|
||||
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
if colors is None:
|
||||
colors = pb.gca()._get_lines.color_cycle
|
||||
pb.clf()
|
||||
|
|
@ -68,8 +71,15 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob)
|
||||
x = np.arange(means.shape[0])
|
||||
for i in range(means.shape[1]):
|
||||
if side_by_side:
|
||||
sub1 = (means.shape[1],2,2*i+1)
|
||||
sub2 = (means.shape[1],2,2*i+2)
|
||||
else:
|
||||
sub1 = (means.shape[1]*2,1,2*i+1)
|
||||
sub2 = (means.shape[1]*2,1,2*i+2)
|
||||
|
||||
# mean and variance plot
|
||||
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 1)
|
||||
a = fig.add_subplot(*sub1)
|
||||
a.plot(means, c='k', alpha=.3)
|
||||
plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
|
||||
a.fill_between(x,
|
||||
|
|
@ -82,7 +92,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
if i < means.shape[1] - 1:
|
||||
a.set_xticklabels('')
|
||||
# binary prob plot
|
||||
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 2)
|
||||
a = fig.add_subplot(*sub2)
|
||||
a.bar(x,gamma[:,i],bottom=0.,linewidth=0,align='center')
|
||||
a.set_xlim(x.min(), x.max())
|
||||
a.set_ylim([0.,1.])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue