mucho changes to linear.py

This commit is contained in:
James Hensman 2014-02-26 08:24:03 +00:00
parent e9957ed896
commit ec6e85b1c1
4 changed files with 32 additions and 32 deletions

View file

@ -70,7 +70,7 @@ class SparseGP(GP):
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations( self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpis2'], Z=self.Z, variational_posterior=self.X) self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else: else:
#gradients wrt kernel #gradients wrt kernel
target = np.zeros(self.kern.size) target = np.zeros(self.kern.size)

View file

@ -274,6 +274,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
if optimize: if optimize:
@ -281,7 +282,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
m.optimize('bfgs', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.q.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m
@ -302,7 +303,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
m.inference_method = VarDTCMissingData() m.inference_method = VarDTCMissingData()
m.Y[inan] = _np.nan m.Y[inan] = _np.nan
m.q.variance *= .1 m.X.variance *= .1
m.parameters_changed() m.parameters_changed()
m.Yreal = Y m.Yreal = Y
@ -311,7 +312,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m.optimize('bfgs', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.q.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m

View file

@ -22,22 +22,25 @@ class Linear(Kern):
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i` :param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter) :type variances: array or list of the appropriate size (or float if there
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self, input_dim, variances=None, ARD=False, name='linear'): def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
super(Linear, self).__init__(input_dim, name) super(Linear, self).__init__(input_dim, name)
self.ARD = ARD self.ARD = ARD
if ARD == False: if not ARD:
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel" assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else: else:
variances = np.ones(1) variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else: else:
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
@ -103,7 +106,6 @@ class Linear(Kern):
#---------------------------------------# #---------------------------------------#
# PSI statistics # # PSI statistics #
# variational #
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
@ -118,18 +120,16 @@ class Linear(Kern):
return np.dot(ZAinner, ZA.T) return np.dot(ZAinner, ZA.T)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
mu, S = variational_posterior.mean, variational_posterior.variance #psi1
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z)
# psi0: # psi0:
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior) tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
if self.ARD: grad = tmp.sum(0) if self.ARD: self.variances.gradient += tmp.sum(0)
else: grad = np.atleast_1d(tmp.sum()) else: self.variances.gradient += tmp.sum()
#psi1
self.update_gradients_full(dL_dpsi1, mu, Z)
grad += self.variances.gradient
#psi2 #psi2
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
if self.ARD: grad += tmp.sum(0).sum(0).sum(0) if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: grad += tmp.sum() else: self.variances.gradient += tmp.sum()
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
#psi1 #psi1
@ -155,7 +155,7 @@ class Linear(Kern):
#--------------------------------------------------# #--------------------------------------------------#
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S): def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, vp, target_mu, target_S):
# Think N,num_inducing,num_inducing,input_dim # Think N,num_inducing,num_inducing,input_dim
ZA = Z * self.variances ZA = Z * self.variances
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :] AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
@ -198,16 +198,15 @@ class Linear(Kern):
weave_options = {'headers' : ['<omp.h>'], weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']} 'extra_link_args' : ['-lgomp']}
mu = vp.mean
mu = pv.mean
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target): def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target):
AZA = self.variances*self._ZAinner(pv, Z) AZA = self.variances*self._ZAinner(vp, Z)
code=""" code="""
int n,m,mm,q; int n,m,mm,q;
#pragma omp parallel for private(n,mm,q) #pragma omp parallel for private(n,mm,q)
@ -229,23 +228,23 @@ class Linear(Kern):
'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']} 'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1] N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1]
mu = param_to_array(pv.mean) mu = param_to_array(vp.mean)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
def _mu2S(self, pv): def _mu2S(self, vp):
return np.square(pv.mean) + pv.variance return np.square(vp.mean) + vp.variance
def _ZAinner(self, pv, Z): def _ZAinner(self, vp, Z):
ZA = Z*self.variances ZA = Z*self.variances
inner = (pv.mean[:, None, :] * pv.mean[:, :, None]) inner = (vp.mean[:, None, :] * vp.mean[:, :, None])
diag_indices = np.diag_indices(pv.mean.shape[1], 2) diag_indices = np.diag_indices(vp.mean.shape[1], 2)
inner[:, diag_indices[0], diag_indices[1]] += pv.variance inner[:, diag_indices[0], diag_indices[1]] += vp.variance
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
def input_sensitivity(self): def input_sensitivity(self):
if self.ARD: return self.variances if self.ARD: return self.variances

View file

@ -66,7 +66,7 @@ class BayesianGPLVM(SparseGP):
super(BayesianGPLVM, self).parameters_changed() super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_q_variational(variational_posterior=self.X, Z=self.Z, **self.grad_dict) self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)