Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Max Zwiessele 2014-05-15 14:06:04 +01:00
commit dfdb1c24e6
12 changed files with 247 additions and 126 deletions

View file

@ -66,7 +66,7 @@ class SparseGP(GP):
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else:
#gradients wrt kernel
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)

View file

@ -71,12 +71,13 @@ class VarDTC_minibatch(LatentFunctionInference):
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
if het_noise:
self.batchsize = 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y)
YYT_factor = Y
trYYT = self.get_trYYT(Y)
psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0
@ -105,19 +106,18 @@ class VarDTC_minibatch(LatentFunctionInference):
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else:
psi0_full += psi0.sum()
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
if uncertain_inputs:
if het_noise:
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
psi2_full += beta_slice*psi2
else:
psi2_full += psi2.sum(axis=0)
psi2_full += psi2
else:
if het_noise:
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
psi2_full += beta_slice*np.outer(psi1,psi1)
else:
psi2_full += tdot(psi1.T)
psi2_full += np.outer(psi1,psi1)
if not het_noise:
psi0_full *= beta
@ -224,7 +224,7 @@ class VarDTC_minibatch(LatentFunctionInference):
psi2 = None
if het_noise:
beta = beta[n_start:n_end]
beta = beta[n_start] # assuming batchsize==1
betaY = beta*Y_slice
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
@ -245,7 +245,7 @@ class VarDTC_minibatch(LatentFunctionInference):
dL_dpsi1 = np.dot(betaY,v.T)
if uncertain_inputs:
dL_dpsi2 = np.einsum('n,mo->nmo',beta * np.ones((n_end-n_start,)),dL_dpsi2R)
dL_dpsi2 = beta* dL_dpsi2R
else:
dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
dL_dpsi2 = None
@ -263,11 +263,11 @@ class VarDTC_minibatch(LatentFunctionInference):
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else:
if uncertain_inputs:
psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2)
psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum() + np.square(beta)*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum()
dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum()
if uncertain_inputs:
grad_dict = {'dL_dpsi0':dL_dpsi0,
@ -297,7 +297,7 @@ def update_gradients(model):
kern_grad = model.kern.gradient.copy()
#gradients w.r.t. Z
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z)
isEnd = False
while not isEnd:
@ -310,8 +310,8 @@ def update_gradients(model):
kern_grad += model.kern.gradient
#gradients w.r.t. Z
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
model.Z.gradient += model.kern.gradients_Z_expectations(
dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])

View file

@ -119,7 +119,7 @@ class Add(CombinationKernel):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target = np.zeros(Z.shape)
for p1 in self.parts:
@ -170,4 +170,4 @@ class Add(CombinationKernel):
return self
def input_sensitivity(self):
return reduce(np.add, [k.input_sensitivity() for k in self.parts])
return reduce(np.add, [k.input_sensitivity() for k in self.parts])

View file

@ -103,7 +103,7 @@ class Kern(Parameterized):
"""
raise NotImplementedError
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Returns the derivative of the objective wrt Z, using the chain rule
through the expectation variables.

View file

@ -124,9 +124,9 @@ def _slice_update_gradients_expectations(f):
def _slice_gradients_Z_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2))
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
return ret
return wrap

View file

@ -169,7 +169,7 @@ class Linear(Kern):
else:
self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
gamma = variational_posterior.binary_prob
mu = variational_posterior.mean

View file

@ -9,12 +9,23 @@ import numpy as np
from GPy.util.caching import Cache_this
@Cache_this(limit=1)
def _Z_distances(Z):
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
return Zhat, Zdist
def psicomputations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2
@Cache_this(limit=1)
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
@ -22,15 +33,10 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
# _dpsi1_dvariance NxM
# _dpsi1_dlengthscale NxMxQ
# _dpsi1_dZ NxMxQ
# _dpsi1_dgamma NxMxQ
# _dpsi1_dmu NxMxQ
# _dpsi1_dS NxMxQ
lengthscale2 = np.square(lengthscale)
@ -40,25 +46,15 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dpsi1_dvariance = _psi1 / variance # NxM
_dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
_dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
_dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
_dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
_dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale
return _psi1
@Cache_this(limit=1)
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
@ -66,19 +62,14 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 NxMxM
# _psi2_dvariance NxMxM
# _psi2_dlengthscale NxMxMxQ
# _psi2_dZ NxMxMxQ
# _psi2_dgamma NxMxMxQ
# _psi2_dmu NxMxMxQ
# _psi2_dS NxMxMxQ
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_Zhat, _psi2_Zdist = _Z_distances(Z)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
@ -93,15 +84,130 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
# _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
# _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
# _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
# _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M
_dpsi2_dvariance = 2. * _psi2/variance # NxMxM
_dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
_dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
_dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
_dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
_dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
# print _psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq #+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
# _dpsi2_dvariance = 2. * _psi2/variance # NxMxM
# _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
# _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
# _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
# _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
# _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -42,9 +42,11 @@ class RBF(Stationary):
#---------------------------------------#
def psi0(self, Z, variational_posterior):
if self.useGPU:
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
else:
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
else:
return self.Kdiag(variational_posterior.mean)
@ -53,7 +55,7 @@ class RBF(Stationary):
if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
else:
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
else:
_, _, _, psi1 = self._psi1computations(Z, variational_posterior)
return psi1
@ -63,7 +65,7 @@ class RBF(Stationary):
if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
else:
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
else:
_, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
return psi2
@ -74,26 +76,30 @@ class RBF(Stationary):
if self.useGPU:
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else:
# dL_dvar, dL_dlengscale, dL_dZ, dL_dgamma, dL_dmu, dL_dS = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
dL_dvar, dL_dlengscale, _, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
self.variance.gradient = dL_dvar
self.lengthscale.gradient = dL_dlengscale
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
if self.ARD:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#from psi2
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
if self.ARD:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
# _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
# _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#
# #contributions from psi0:
# self.variance.gradient = np.sum(dL_dpsi0)
#
# #from psi1
# self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
# if self.ARD:
# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
# else:
# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#
# #from psi2
# self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
# if self.ARD:
# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
# else:
# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale**2
@ -126,22 +132,25 @@ class RBF(Stationary):
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU:
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else:
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#psi2
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
return grad
_, _, dL_dZ, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
return dL_dZ
# _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
# _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#
# #psi1
# grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#
# #psi2
# grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
#
# return grad
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
@ -167,26 +176,29 @@ class RBF(Stationary):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU:
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else:
ndata = variational_posterior.mean.shape[0]
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
if self.group_spike_prob:
grad_gamma[:] = grad_gamma.mean(axis=0)
return grad_mu, grad_S, grad_gamma
else:
_, _, _, dL_dmu, dL_dS, dL_dgamma = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
return dL_dmu, dL_dS, dL_dgamma
# ndata = variational_posterior.mean.shape[0]
#
# _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
# _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#
# #psi1
# grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
# grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
# grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#
# #psi2
# grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
# grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
# grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
#
# if self.group_spike_prob:
# grad_gamma[:] = grad_gamma.mean(axis=0)
#
# return grad_mu, grad_S, grad_gamma
elif isinstance(variational_posterior, variational.NormalPosterior):

View file

@ -25,7 +25,7 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape)
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):

View file

@ -11,7 +11,7 @@ from ..likelihoods import Gaussian
from ..inference.optimization import SCG
from ..util import linalg
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
@ -41,7 +41,7 @@ class SSGPLVM(SparseGP):
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma = np.empty_like(X, order='F') # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
if group_spike:
@ -60,12 +60,15 @@ class SSGPLVM(SparseGP):
pi = np.empty((input_dim))
pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
X = np.asfortranarray(X)
X_variance = np.asfortranarray(X_variance)
gamma = np.asfortranarray(gamma)
X = SpikeAndSlabPosterior(X, X_variance, gamma)
if group_spike:
kernel.group_spike_prob = True
self.variational_prior.group_spike_prob = True
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0)
@ -76,7 +79,7 @@ class SSGPLVM(SparseGP):
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
def parameters_changed(self):
if isinstance(self.inference_method, VarDTC_GPU):
if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch):
update_gradients(self)
return

View file

@ -8,7 +8,7 @@ import numpy as np
from GPy.util.pca import pca
def initialize_latent(init, input_dim, Y):
Xr = np.random.randn(Y.shape[0], input_dim)
Xr = np.asfortranarray(np.random.randn(Y.shape[0], input_dim))
if init == 'PCA':
p = pca(Y)
PC = p.project(Y, min(input_dim, Y.shape[1]))
@ -20,4 +20,4 @@ def initialize_latent(init, input_dim, Y):
Xr -= Xr.mean(0)
Xr /= Xr.var(0)
return Xr, var/var.max()
return Xr, var/var.max()

View file

@ -123,7 +123,7 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
:returns:
"""
A = force_F_ordered(A)
A = np.asfortranarray(A)
#Note: B does not seem to need to be F ordered!
return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)