Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Alan Saul 2015-05-26 16:02:31 +01:00
commit f43df8798a
20 changed files with 15370 additions and 857 deletions

View file

@ -208,6 +208,7 @@ class GP(Model):
Kxx = kern.Kdiag(_Xnew) Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0) var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1) var = var.reshape(-1, 1)
var[var<0.] = 0.
#force mu to be a column vector #force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None] if len(mu.shape)==1: mu = mu[:,None]
@ -229,13 +230,14 @@ class GP(Model):
:param Y_metadata: metadata about the predicting point to pass to the likelihood :param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model :param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses. kern). this is useful for examining e.g. subprocesses.
:returns: (mean, var, lower_upper): :returns: (mean, var):
mean: posterior mean, a Numpy array, Nnew x self.input_dim mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
lower_upper: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions. This is to allow for different normalizations of the output dimensions.
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
""" """
#predict the latent function values #predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern) mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
@ -255,7 +257,7 @@ class GP(Model):
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval :param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type quantiles: tuple :type quantiles: tuple
:returns: list of quantiles for each X and predictive quantiles for interval combination :returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)] :rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
""" """
m, v = self._raw_predict(X, full_cov=False) m, v = self._raw_predict(X, full_cov=False)
if self.normalizer is not None: if self.normalizer is not None:

View file

@ -76,7 +76,7 @@ class Model(Parameterized):
jobs = [] jobs = []
pool = mp.Pool(processes=num_processes) pool = mp.Pool(processes=num_processes)
for i in range(num_restarts): for i in range(num_restarts):
self.randomize() if i>0: self.randomize()
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs) job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job) jobs.append(job)
@ -90,7 +90,7 @@ class Model(Parameterized):
for i in range(num_restarts): for i in range(num_restarts):
try: try:
if not parallel: if not parallel:
self.randomize() if i>0: self.randomize()
self.optimize(**kwargs) self.optimize(**kwargs)
else: else:
self.optimization_runs.append(jobs[i].get()) self.optimization_runs.append(jobs[i].get())

View file

@ -38,6 +38,11 @@ class Param(Parameterizable, ObsAr):
Fixing parameters will fix them to the value they are right now. If you change Fixing parameters will fix them to the value they are right now. If you change
the fixed value, it will be fixed to the new value! the fixed value, it will be fixed to the new value!
Important Note:
Multilevel indexing (e.g. self[:2][1:]) is not supported and might lead to unexpected behaviour.
Try to index in one go, using boolean indexing or the numpy builtin
np.index function.
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc. See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
""" """

View file

@ -36,8 +36,9 @@ class NormalPrior(VariationalPrior):
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5 variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
class SpikeAndSlabPrior(VariationalPrior): class SpikeAndSlabPrior(VariationalPrior):
def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw): def __init__(self, pi=None, learnPi=False, variance = 1.0, group_spike=False, name='SpikeAndSlabPrior', **kw):
super(SpikeAndSlabPrior, self).__init__(name=name, **kw) super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
self.group_spike = group_spike
self.variance = Param('variance',variance) self.variance = Param('variance',variance)
self.learnPi = learnPi self.learnPi = learnPi
if learnPi: if learnPi:
@ -50,7 +51,10 @@ class SpikeAndSlabPrior(VariationalPrior):
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.gamma.values if self.group_spike:
gamma = variational_posterior.gamma.values[0]
else:
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2: if len(self.pi.shape)==2:
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1]) idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx] pi = self.pi[idx]
@ -65,14 +69,21 @@ class SpikeAndSlabPrior(VariationalPrior):
def update_gradients_KL(self, variational_posterior): def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.gamma.values if self.group_spike:
gamma = variational_posterior.gamma.values[0]
else:
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2: if len(self.pi.shape)==2:
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1]) idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx] pi = self.pi[idx]
else: else:
pi = self.pi pi = self.pi
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. if self.group_spike:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))/variational_posterior.num_data
else:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= gamma*mu/self.variance mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2. S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi: if self.learnPi:
@ -154,13 +165,31 @@ class SpikeAndSlabPosterior(VariationalPosterior):
''' '''
The SpikeAndSlab distribution for variational approximations. The SpikeAndSlab distribution for variational approximations.
''' '''
def __init__(self, means, variances, binary_prob, name='latent space'): def __init__(self, means, variances, binary_prob, group_spike=False, sharedX=False, name='latent space'):
""" """
binary_prob : the probability of the distribution on the slab part. binary_prob : the probability of the distribution on the slab part.
""" """
super(SpikeAndSlabPosterior, self).__init__(means, variances, name) super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.)) self.group_spike = group_spike
self.link_parameter(self.gamma) self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
if group_spike:
self.gamma_group = Param("binary_prob_group",binary_prob.mean(axis=0),Logistic(1e-6,1.-1e-6))
self.gamma = Param("binary_prob",binary_prob, __fixed__)
self.link_parameters(self.gamma_group,self.gamma)
else:
self.gamma = Param("binary_prob",binary_prob,Logistic(1e-6,1.-1e-6))
self.link_parameter(self.gamma)
def propogate_val(self):
if self.group_spike:
self.gamma.values[:] = self.gamma_group.values
def collate_gradient(self):
if self.group_spike:
self.gamma_group.gradient = self.gamma.gradient.reshape(self.gamma.shape).sum(axis=0)
def set_gradients(self, grad): def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
@ -179,15 +208,15 @@ class SpikeAndSlabPosterior(VariationalPosterior):
n.parameters[dc['variance']._parent_index_] = dc['variance'] n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob'] n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n._gradient_array_ = None n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size oversize = self.size - self.mean.size - self.variance.size - self.gamma.size
n.size = n.mean.size + n.variance.size + oversize n.size = n.mean.size + n.variance.size + n.gamma.size + oversize
n.ndim = n.mean.ndim n.ndim = n.mean.ndim
n.shape = n.mean.shape n.shape = n.mean.shape
n.num_data = n.mean.shape[0] n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1 n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n return n
else: else:
return super(VariationalPrior, self).__getitem__(s) return super(SpikeAndSlabPosterior, self).__getitem__(s)
def plot(self, *args, **kwargs): def plot(self, *args, **kwargs):
""" """

View file

@ -46,7 +46,7 @@ class SVGP(SparseGP):
num_latent_functions = Y.shape[1] num_latent_functions = Y.shape[1]
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions))) self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,num_latent_functions))) chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[None,:,:], (num_latent_functions, 1,1)))
self.chol = Param('q_u_chol', chol) self.chol = Param('q_u_chol', chol)
self.link_parameter(self.chol) self.link_parameter(self.chol)
self.link_parameter(self.m) self.link_parameter(self.m)

View file

@ -5,9 +5,10 @@ from __future__ import print_function
import numpy as np import numpy as np
import sys import sys
import time import time
import datetime
def exponents(fnow, current_grad): def exponents(fnow, current_grad):
exps = [np.abs(np.float(fnow)), current_grad] exps = [np.abs(np.float(fnow)), 1 if current_grad is np.nan else current_grad]
return np.sign(exps) * np.log10(exps).astype(int) return np.sign(exps) * np.log10(exps).astype(int)
class VerboseOptimization(object): class VerboseOptimization(object):
@ -23,6 +24,7 @@ class VerboseOptimization(object):
self.model.add_observer(self, self.print_status) self.model.add_observer(self, self.print_status)
self.status = 'running' self.status = 'running'
self.clear = clear_after_finish self.clear = clear_after_finish
self.deltat = .2
self.update() self.update()
@ -44,25 +46,25 @@ class VerboseOptimization(object):
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal') self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
display(self.hor_align) display(self.hor_align)
try: try:
self.text.set_css('width', '100%') self.text.set_css('width', '100%')
left_col.set_css({ left_col.set_css({
'padding': '2px', 'padding': '2px',
'width': "100%", 'width': "100%",
}) })
right_col.set_css({ right_col.set_css({
'padding': '2px', 'padding': '2px',
}) })
self.hor_align.set_css({ self.hor_align.set_css({
'width': "100%", 'width': "100%",
}) })
self.hor_align.remove_class('vbox') self.hor_align.remove_class('vbox')
self.hor_align.add_class('hbox') self.hor_align.add_class('hbox')
left_col.add_class("box-flex1") left_col.add_class("box-flex1")
right_col.add_class('box-flex0') right_col.add_class('box-flex0')
@ -74,16 +76,31 @@ class VerboseOptimization(object):
else: else:
self.exps = exponents(self.fnow, self.current_gradient) self.exps = exponents(self.fnow, self.current_gradient)
print('Running {} Code:'.format(self.opt_name)) print('Running {} Code:'.format(self.opt_name))
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters)) print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "runtime", mi=self.len_maxiters))
def __enter__(self): def __enter__(self):
self.start = time.time() self.start = time.time()
return self return self
def print_out(self): def print_out(self, seconds):
if seconds<60:
ms = (seconds%1)*100
self.timestring = "{s:0>2d}s{ms:0>2d}".format(s=int(seconds), ms=int(ms))
else:
m, s = divmod(seconds, 60)
if m>59:
h, m = divmod(m, 60)
if h>23:
d, h = divmod(h, 24)
self.timestring = '{d:0>2d}d{h:0>2d}h{m:0>2d}'.format(m=int(m), h=int(h), d=int(d))
else:
self.timestring = '{h:0>2d}h{m:0>2d}m{s:0>2d}'.format(m=int(m), s=int(s), h=int(h))
else:
ms = (seconds%1)*100
self.timestring = '{m:0>2d}m{s:0>2d}s{ms:0>2d}'.format(m=int(m), s=int(s), ms=int(ms))
if self.ipython_notebook: if self.ipython_notebook:
names_vals = [['optimizer', "{:s}".format(self.opt_name)], names_vals = [['optimizer', "{:s}".format(self.opt_name)],
['runtime [s]', "{:> g}".format(time.time()-self.start)], ['runtime', "{:>s}".format(self.timestring)],
['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)], ['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)],
['objective', "{: > 12.3E}".format(self.fnow)], ['objective', "{: > 12.3E}".format(self.fnow)],
['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))], ['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))],
@ -120,14 +137,18 @@ class VerboseOptimization(object):
if b: if b:
self.exps = n_exps self.exps = n_exps
print('\r', end=' ') print('\r', end=' ')
print('{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', print('{3:} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), "{:>8s}".format(self.timestring), mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
def print_status(self, me, which=None): def print_status(self, me, which=None):
self.update() self.update()
seconds = time.time()-self.start
#sys.stdout.write(" "*len(self.message)) #sys.stdout.write(" "*len(self.message))
self.print_out() self.deltat += seconds
if self.deltat > .2:
self.print_out(seconds)
self.deltat = 0
self.iteration += 1 self.iteration += 1
@ -153,12 +174,12 @@ class VerboseOptimization(object):
if self.verbose: if self.verbose:
self.stop = time.time() self.stop = time.time()
self.model.remove_observer(self) self.model.remove_observer(self)
self.print_out() self.print_out(self.stop - self.start)
if not self.ipython_notebook: if not self.ipython_notebook:
print() print()
print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)) print('Runtime: {}'.format("{:>9s}".format(self.timestring)))
print('Optimization status: {0}'.format(self.status)) print('Optimization status: {0}'.format(self.status))
print() print()
elif self.clear: elif self.clear:
self.hor_align.close() self.hor_align.close()

View file

@ -353,13 +353,13 @@ def ssgplvm_simulation(optimize=True, verbose=1,
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
# k = kern.RBF(Q, ARD=True, lengthscale=10.) # k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k) m = SSGPLVM(Y, Q, init="rand", num_inducing=num_inducing, kernel=k, group_spike=True)
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape) m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
m.likelihood.variance = .1 m.likelihood.variance = .01
if optimize: if optimize:
print("Optimizing model:") print("Optimizing model:")
m.optimize('scg', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.X.plot("SSGPLVM Latent Space 1D") m.X.plot("SSGPLVM Latent Space 1D")

View file

@ -3,6 +3,7 @@ from ...util import linalg
from ...util import choleskies from ...util import choleskies
import numpy as np import numpy as np
from .posterior import Posterior from .posterior import Posterior
from scipy.linalg.blas import dgemm, dsymm, dtrmm
class SVGP(LatentFunctionInference): class SVGP(LatentFunctionInference):
@ -16,16 +17,13 @@ class SVGP(LatentFunctionInference):
S = np.empty((num_outputs, num_inducing, num_inducing)) S = np.empty((num_outputs, num_inducing, num_inducing))
[np.dot(L[:,:,i], L[:,:,i].T, S[i,:,:]) for i in range(num_outputs)] [np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
S = S.swapaxes(0,2)
#Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1) #Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
Si = choleskies.multiple_dpotri(L) Si = choleskies.multiple_dpotri(L)
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])]) logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
if np.any(np.isinf(Si)): if np.any(np.isinf(Si)):
raise ValueError("Cholesky representation unstable") raise ValueError("Cholesky representation unstable")
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
#Si, Lnew, _,_ = linalg.pdinv(S)
#compute mean function stuff #compute mean function stuff
if mean_function is not None: if mean_function is not None:
@ -35,27 +33,31 @@ class SVGP(LatentFunctionInference):
prior_mean_u = np.zeros((num_inducing, num_outputs)) prior_mean_u = np.zeros((num_inducing, num_outputs))
prior_mean_f = np.zeros((num_data, num_outputs)) prior_mean_f = np.zeros((num_data, num_outputs))
#compute kernel related stuff #compute kernel related stuff
Kmm = kern.K(Z) Kmm = kern.K(Z)
Knm = kern.K(X, Z) Kmn = kern.K(Z, X)
Knn_diag = kern.Kdiag(X) Knn_diag = kern.Kdiag(X)
Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm) Lm = linalg.jitchol(Kmm)
logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
Kmmi, _ = linalg.dpotri(Lm)
#compute the marginal means and variances of q(f) #compute the marginal means and variances of q(f)
A = np.dot(Knm, Kmmi) A, _ = linalg.dpotrs(Lm, Kmn)
mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u) mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
#v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jlk->ilk', A, S),1) v = np.empty((num_data, num_outputs))
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S),1) for i in range(num_outputs):
tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
v[:,i] = np.sum(np.square(tmp),0)
v += (Knn_diag - np.sum(A*Kmn,0))[:,None]
#compute the KL term #compute the KL term
Kmmim = np.dot(Kmmi, q_u_mean) Kmmim = np.dot(Kmmi, q_u_mean)
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[:,:,None]*S,0).sum(0) + 0.5*np.sum(q_u_mean*Kmmim,0) KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
KL = KLs.sum() KL = KLs.sum()
#gradient of the KL term (assuming zero mean function) #gradient of the KL term (assuming zero mean function)
dKL_dm = Kmmim.copy() dKL_dm = Kmmim.copy()
dKL_dS = 0.5*(Kmmi[:,:,None] - Si) dKL_dS = 0.5*(Kmmi[None,:,:] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T) dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(0)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
if mean_function is not None: if mean_function is not None:
#adjust KL term for mean function #adjust KL term for mean function
@ -80,17 +82,20 @@ class SVGP(LatentFunctionInference):
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
#derivatives of expected likelihood, assuming zero mean function #derivatives of expected likelihood, assuming zero mean function
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal Adv = A[None,:,:]*dF_dv.T[:,None,:] # As if dF_Dv is diagonal, D, M, N
Admu = A.T.dot(dF_dmu) Admu = A.dot(dF_dmu)
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)]) Adv = np.ascontiguousarray(Adv) # makes for faster operations later...(inc dsymm)
#tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi) AdvA = np.dot(Adv.reshape(-1, num_data),A.T).reshape(num_outputs, num_inducing, num_inducing )
tmp = linalg.ijk_jlk_to_il(AdvA, S).dot(Kmmi) tmp = np.sum([np.dot(a,s) for a, s in zip(AdvA, S)],0).dot(Kmmi)
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(0) - tmp - tmp.T
dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug? dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
#tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None]) tmp = S.reshape(-1, num_inducing).dot(Kmmi).reshape(num_outputs, num_inducing , num_inducing )
tmp = 2.*(linalg.ij_jlk_to_ilk(Kmmi, S) - np.eye(num_inducing)[:,:,None]) tmp = 2.*(tmp - np.eye(num_inducing)[None, :,:])
#dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
dF_dKmn = linalg.ijk_jlk_to_il(tmp, Adv) + Kmmim.dot(dF_dmu.T) dF_dKmn = Kmmim.dot(dF_dmu.T)
for a,b in zip(tmp, Adv):
dF_dKmn += np.dot(a.T, b)
dF_dm = Admu dF_dm = Admu
dF_dS = AdvA dF_dS = AdvA
@ -106,11 +111,11 @@ class SVGP(LatentFunctionInference):
log_marginal = F.sum() - KL log_marginal = F.sum() - KL
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)]) dL_dchol = 2.*np.array([np.dot(a,b) for a, b in zip(dL_dS, L) ])
dL_dchol = choleskies.triang_to_flat(dL_dchol) dL_dchol = choleskies.triang_to_flat(dL_dchol)
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL} grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
if mean_function is not None: if mean_function is not None:
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
grad_dict['dL_dmfX'] = dF_dmfX grad_dict['dL_dmfX'] = dF_dmfX
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict return Posterior(mean=q_u_mean, cov=S.T, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict

View file

@ -78,7 +78,7 @@ class MLP(Kern):
*((vec1[:, None]+vec2[None, :])*self.weight_variance *((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum() + 2*self.bias_variance + 2.))*base_cov_grad).sum()
def update_gradients_diag(self, X): def update_gradients_diag(self, dL_dKdiag, X):
self._K_diag_computations(X) self._K_diag_computations(X)
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag) self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)

View file

@ -15,7 +15,7 @@ from ...util.caching import Cache_this
try: try:
import stationary_cython import stationary_cython
except ImportError: except ImportError:
print('warning: failed to import cython module: falling back to numpy') print('warning in sationary: failed to import cython module: falling back to numpy')
config.set('cython', 'working', 'false') config.set('cython', 'working', 'false')

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,9 @@
#cython: boundscheck=False #cython: boundscheck=False
#cython: nonecheck=False
#cython: wraparound=False #cython: wraparound=False
import numpy as np import numpy as np
cimport numpy as np cimport numpy as np
from cython.parallel import prange
ctypedef np.float64_t DTYPE_t ctypedef np.float64_t DTYPE_t
@ -22,7 +24,18 @@ def grad_X(int N, int D, int M,
cdef double *grad = <double*> _grad.data cdef double *grad = <double*> _grad.data
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place. _grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q, def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
cdef int n,d,nd,m
for nd in prange(N*D, nogil=True):
n = nd/D
d = nd%D
grad[n,d] = 0.0
for m in range(M):
grad[n,d] += tmp[n,m]*(X[n,d]-X2[m,d])
def lengthscale_grads_in_c(int N, int M, int Q,
np.ndarray[DTYPE_t, ndim=2] _tmp, np.ndarray[DTYPE_t, ndim=2] _tmp,
np.ndarray[DTYPE_t, ndim=2] _X, np.ndarray[DTYPE_t, ndim=2] _X,
np.ndarray[DTYPE_t, ndim=2] _X2, np.ndarray[DTYPE_t, ndim=2] _X2,
@ -33,4 +46,14 @@ def lengthscale_grads(int N, int M, int Q,
cdef double *grad = <double*> _grad.data cdef double *grad = <double*> _grad.data
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place. _lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
cdef int q, n, m
cdef double gradq, dist
for q in range(Q):
grad[q] = 0.0
for n in range(N):
for m in range(M):
dist = X[n,q] - X2[m,q]
grad[q] += tmp[n,m]*dist*dist

View file

@ -1,19 +1,36 @@
void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){ void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
int n,m,d;
double retnd; double retnd;
//#pragma omp parallel for private(n,d, retnd, m) int n,d,nd,m;
for(d=0;d<D;d++){ #pragma omp parallel for private(nd,n,d, retnd, m)
for(n=0;n<N;n++){ for(nd=0;nd<(D*N);nd++){
retnd = 0.0; n = nd/D;
for(m=0;m<M;m++){ d = nd%D;
retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]); retnd = 0.0;
} for(m=0;m<M;m++){
grad[n*D+d] = retnd; retnd += tmp[n*M+m]*(X[nd]-X2[m*D+d]);
} }
grad[nd] = retnd;
} }
} //grad_X } //grad_X
void _lengthscale_grads_unsafe(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,nm,q,nQ,mQ;
double dist;
#pragma omp parallel for private(n,m,nm,q,nQ,mQ,dist)
for(nm=0; nm<(N*M); nm++){
n = nm/M;
m = nm%M;
nQ = n*Q;
mQ = m*Q;
for(q=0; q<Q; q++){
dist = X[nQ+q]-X2[mQ+q];
grad[q] += tmp[nm]*dist*dist;
}
}
} //lengthscale_grads
void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){ void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,q; int n,m,q;
double gradq, dist; double gradq, dist;
@ -33,3 +50,5 @@ for(q=0; q<Q; q++){

View file

@ -143,7 +143,7 @@ class Likelihood(Parameterized):
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf) p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
for yi, mi, vi, yi_m in zipped_values]) for yi, mi, vi, yi_m in zipped_values])
p_ystar = np.array(p_ystar).reshape(-1, 1) p_ystar = np.array(p_ystar).reshape(*y_test.shape)
return np.log(p_ystar) return np.log(p_ystar)
def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000): def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
@ -173,6 +173,7 @@ class Likelihood(Parameterized):
from scipy.misc import logsumexp from scipy.misc import logsumexp
log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1) log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
log_p_ystar = np.array(log_p_ystar).reshape(*y_test.shape)
return log_p_ystar return log_p_ystar

View file

@ -5,11 +5,95 @@ import numpy as np
from ..core.sparse_gp_mpi import SparseGP_MPI from ..core.sparse_gp_mpi import SparseGP_MPI
from .. import kern from .. import kern
from ..core.parameterization import Param
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior,VariationalPrior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
class IBPPosterior(SpikeAndSlabPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, tau=None, sharedX=False, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
from ..core.parameterization.transformations import Logexp
super(IBPPosterior, self).__init__(means, variances, binary_prob, group_spike=True, name=name)
self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
self.tau = Param("tau_", np.ones((self.gamma_group.shape[0],2)), Logexp())
self.link_parameter(self.tau)
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient, self.tau.gradient = grad
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['binary_prob'] = self.binary_prob[s]
dc['tau'] = self.tau
dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc)
n.parameters[dc['mean']._parent_index_] = dc['mean']
n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n.parameters[dc['tau']._parent_index_] = dc['tau']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size - self.tau.size
n.size = n.mean.size + n.variance.size + n.gamma.size+ n.tau.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(IBPPosterior, self).__getitem__(s)
class IBPPrior(VariationalPrior):
def __init__(self, input_dim, alpha =2., name='IBPPrior', **kw):
super(IBPPrior, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(ad,1.)*self.input_dim \
-betaln(tau[:,0], tau[:,1]).sum() + ((tau[:,0]-gamma-ad)*digamma(tau[:,0])).sum() + \
((tau[:,1]+gamma-2.)*digamma(tau[:,1])).sum() + ((2.+ad-tau[:,0]-tau[:,1])*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= gamma*mu/self.variance
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2.
from scipy.special import digamma,polygamma
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = (ad+2-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common)
class SSGPLVM(SparseGP_MPI): class SSGPLVM(SparseGP_MPI):
""" """
Spike-and-Slab Gaussian Process Latent Variable Model Spike-and-Slab Gaussian Process Latent Variable Model
@ -23,9 +107,11 @@ class SSGPLVM(SparseGP_MPI):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
self.group_spike = group_spike self.group_spike = group_spike
self.init = init
self.sharedX = sharedX
if X == None: if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
@ -33,8 +119,6 @@ class SSGPLVM(SparseGP_MPI):
else: else:
fracs = np.ones(input_dim) fracs = np.ones(input_dim)
self.init = init
if X_variance is None: # The variance of the variational approximation (S) if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
@ -64,17 +148,16 @@ class SSGPLVM(SparseGP_MPI):
if pi is None: if pi is None:
pi = np.empty((input_dim)) pi = np.empty((input_dim))
pi[:] = 0.5 pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi) # the prior probability of the latent binary variable b
if IBP:
X = SpikeAndSlabPosterior(X, X_variance, gamma) self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior
X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX)
else:
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior
X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX)
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs) super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
# self.X.unfix()
# self.X.variance.constrain_positive()
self.link_parameter(self.X, index=0) self.link_parameter(self.X, index=0)
if self.group_spike:
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in range(self.X.gamma.shape[1])] # Tie columns together
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form.""" """Set the gradients of the posterior distribution of X in its specific form."""
@ -84,9 +167,15 @@ class SSGPLVM(SparseGP_MPI):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
def _propogate_X_val(self):
pass
def parameters_changed(self): def parameters_changed(self):
self.X.propogate_val()
if self.sharedX: self._highest_parent_._propogate_X_val()
super(SSGPLVM,self).parameters_changed() super(SSGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch): if isinstance(self.inference_method, VarDTC_minibatch):
self.X.collate_gradient()
return return
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -95,6 +184,7 @@ class SSGPLVM(SparseGP_MPI):
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)
self.X.collate_gradient()
def input_sensitivity(self): def input_sensitivity(self):
if self.kern.ARD: if self.kern.ARD:

View file

@ -2,33 +2,256 @@
The Maniforld Relevance Determination model with the spike-and-slab prior The Maniforld Relevance Determination model with the spike-and-slab prior
""" """
import numpy as np
from ..core import Model from ..core import Model
from .ss_gplvm import SSGPLVM from .ss_gplvm import SSGPLVM
from ..core.parameterization.variational import SpikeAndSlabPrior,NormalPosterior,VariationalPrior
from ..util.misc import param_to_array
from ..kern import RBF
from ..core import Param
from numpy.linalg.linalg import LinAlgError
class SSMRD(Model): class SSMRD(Model):
def __init__(self, Ylist, input_dim, X=None, X_variance=None, def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute',
initx = 'PCA', initz = 'permute', num_inducing=10, Zs=None, kernels=None, inference_methods=None, likelihoods=None, group_spike=True,
num_inducing=10, Z=None, kernel=None, pi=0.5, name='ss_mrd', Ynames=None, mpi_comm=None, IBP=False, alpha=2., taus=None, ):
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
super(SSMRD, self).__init__(name) super(SSMRD, self).__init__(name)
self.mpi_comm = mpi_comm
self._PROPAGATE_ = False
self.updates = False # initialize X for individual models
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx, X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods, self.X = NormalPosterior(means=X, variances=X_variance)
name='model_'+str(i)) for i,y in enumerate(Ylist)]
self.add_parameters(*(self.models))
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])] if kernels is None:
for i in range(self.models[0].X.mean.shape[0])] kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in xrange(len(Ylist))]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])] if Zs is None:
for i in range(self.models[0].X.variance.shape[0])] Zs = [None]* len(Ylist)
if likelihoods is None:
likelihoods = [None]* len(Ylist)
if inference_methods is None:
inference_methods = [None]* len(Ylist)
self.updates = True if IBP:
self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in xrange(len(Ylist))]
else:
self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in xrange(len(Ylist))]
self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike,
kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i],
name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)]
self.link_parameters(*(self.models+[self.X]))
def _propogate_X_val(self):
if self._PROPAGATE_: return
for m in self.models:
m.X.mean.values[:] = self.X.mean.values
m.X.variance.values[:] = self.X.variance.values
varp_list = [m.X for m in self.models]
[vp._update_inernal(varp_list) for vp in self.var_priors]
self._PROPAGATE_=True
def _collate_X_gradient(self):
self._PROPAGATE_ = False
self.X.mean.gradient[:] = 0
self.X.variance.gradient[:] = 0
for m in self.models:
self.X.mean.gradient += m.X.mean.gradient
self.X.variance.gradient += m.X.variance.gradient
def parameters_changed(self): def parameters_changed(self):
super(SSMRD, self).parameters_changed() super(SSMRD, self).parameters_changed()
[m.parameters_changed() for m in self.models]
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models]) self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
self._collate_X_gradient()
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
# Divide latent dimensions
idx = np.empty((input_dim,),dtype=np.int)
residue = (input_dim)%(len(Ylist))
for i in xrange(len(Ylist)):
if i < residue:
size = input_dim/len(Ylist)+1
idx[i*size:(i+1)*size] = i
else:
size = input_dim/len(Ylist)
idx[i*size+residue:(i+1)*size+residue] = i
if X is None:
if initx == 'PCA_concat':
X = np.empty((Ylist[0].shape[0],input_dim))
fracs = np.empty((input_dim,))
from ..util.initialization import initialize_latent
for i in xrange(len(Ylist)):
Y = Ylist[i]
dim = (idx==i).sum()
if dim>0:
x, fr = initialize_latent('PCA', dim, Y)
X[:,idx==i] = x
fracs[idx==i] = fr
elif initx=='PCA_joint':
y = np.hstack(Ylist)
from ..util.initialization import initialize_latent
X, fracs = initialize_latent('PCA', input_dim, y)
else:
X = np.random.randn(Ylist[0].shape[0], input_dim)
fracs = np.ones(input_dim)
else:
fracs = np.ones(input_dim)
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
if Gammas is None:
Gammas = []
for x in X:
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
Gammas.append(gamma)
return X, X_variance, Gammas, fracs
@Model.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
Model.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
elif self.mpi_comm.rank==0:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
x = self.optimizer_array.copy()
flag = np.empty(1,dtype=np.int32)
while True:
self.mpi_comm.Bcast(flag,root=0)
if flag==1:
try:
self.optimizer_array = x
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures:
raise
self._fail_count += 1
elif flag==-1:
break
else:
self._IN_OPTIMIZATION_ = False
raise Exception("Unrecognizable flag for synchronization!")
self._IN_OPTIMIZATION_ = False
class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
def __init__(self, nModels, pi=0.5, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
self.nModels = nModels
self._b_prob_all = 0.5
super(SpikeAndSlabPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
if self.group_spike:
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
else:
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
[np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
if self.group_spike:
gamma = variational_posterior.binary_prob[0]
else:
gamma = variational_posterior.binary_prob
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
N = variational_posterior.num_data
if self.group_spike:
gamma = variational_posterior.binary_prob.values[0]
else:
gamma = variational_posterior.binary_prob.values
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
if self.group_spike:
tmp = self._b_prob_all/(1.-gamma)
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
else:
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
if self.learnPi:
raise 'Not Supported!'
class IBPPrior_SSMRD(VariationalPrior):
def __init__(self, nModels, input_dim, alpha =2., tau=None, name='IBPPrior', **kw):
super(IBPPrior_SSMRD, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.nModels = nModels
self._b_prob_all = 0.5
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = ((1.-self._b_prob_all)* (np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + (betaln(ad,1.)*self.input_dim -betaln(tau[:,0], tau[:,1]).sum())/self.nModels \
+ (( (tau[:,0]-ad)/self.nModels -gamma)*digamma(tau[:,0])).sum() + \
(((tau[:,1]-1.)/self.nModels+gamma-1.)*digamma(tau[:,1])).sum() + (((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
from scipy.special import digamma,polygamma
tmp = self._b_prob_all/(1.-gamma)
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = ((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -(((tau[:,0]-ad)/self.nModels -gamma)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -(((tau[:,1]-1.)/self.nModels+gamma-1.)*polygamma(1,tau[:,1])+common)

View file

@ -9,8 +9,8 @@ These tests make sure that the opure python and cython codes work the same
class CythonTestChols(np.testing.TestCase): class CythonTestChols(np.testing.TestCase):
def setUp(self): def setUp(self):
self.flat = np.random.randn(45, 5) self.flat = np.random.randn(45,5)
self.triang = np.dstack([np.eye(20)[:,:,None] for i in range(3)]) self.triang = np.array([np.eye(20) for i in range(3)])
def test_flat_to_triang(self): def test_flat_to_triang(self):
L1 = choleskies._flat_to_triang_pure(self.flat) L1 = choleskies._flat_to_triang_pure(self.flat)
L2 = choleskies._flat_to_triang_cython(self.flat) L2 = choleskies._flat_to_triang_cython(self.flat)

View file

@ -17,12 +17,12 @@ def safe_root(N):
def _flat_to_triang_pure(flat_mat): def _flat_to_triang_pure(flat_mat):
N, D = flat_mat.shape N, D = flat_mat.shape
M = (-1 + safe_root(8*N+1))//2 M = (-1 + safe_root(8*N+1))//2
ret = np.zeros((M, M, D)) ret = np.zeros((D, M, M))
count = 0 for d in range(D):
for m in range(M): count = 0
for mm in range(m+1): for m in range(M):
for d in range(D): for mm in range(m+1):
ret.flat[d + m*D*M + mm*D] = flat_mat.flat[count]; ret[d,m, mm] = flat_mat[count, d];
count = count+1 count = count+1
return ret return ret
@ -33,15 +33,15 @@ def _flat_to_triang_cython(flat_mat):
def _triang_to_flat_pure(L): def _triang_to_flat_pure(L):
M, _, D = L.shape D, _, M = L.shape
N = M*(M+1)//2 N = M*(M+1)//2
flat = np.empty((N, D)) flat = np.empty((N, D))
count = 0; for d in range(D):
for m in range(M): count = 0;
for mm in range(m+1): for m in range(M):
for d in range(D): for mm in range(m+1):
flat.flat[count] = L.flat[d + m*D*M + mm*D]; flat[count,d] = L[d, m, mm]
count = count +1 count = count +1
return flat return flat
@ -74,7 +74,7 @@ def triang_to_cov(L):
return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])]) return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
def multiple_dpotri(Ls): def multiple_dpotri(Ls):
return np.dstack([linalg.dpotri(np.asfortranarray(Ls[:,:,i]), lower=1)[0] for i in range(Ls.shape[-1])]) return np.array([linalg.dpotri(np.asfortranarray(Ls[i]), lower=1)[0] for i in range(Ls.shape[0])])
def indexes_to_fix_for_low_rank(rank, size): def indexes_to_fix_for_low_rank(rank, size):
""" """

File diff suppressed because it is too large Load diff

View file

@ -8,28 +8,28 @@ import numpy as np
cimport numpy as np cimport numpy as np
def flat_to_triang(np.ndarray[double, ndim=2] flat, int M): def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
"""take a matrix N x D and return a M X M x D array where """take a matrix N x D and return a D X M x M array where
N = M(M+1)/2 N = M(M+1)/2
the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat. the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
""" """
cdef int N = flat.shape[0]
cdef int D = flat.shape[1] cdef int D = flat.shape[1]
cdef int N = flat.shape[0]
cdef int count = 0 cdef int count = 0
cdef np.ndarray[double, ndim=3] ret = np.zeros((M, M, D)) cdef np.ndarray[double, ndim=3] ret = np.zeros((D, M, M))
cdef int d, m, mm cdef int d, m, mm
for d in range(D): for d in range(D):
count = 0 count = 0
for m in range(M): for m in range(M):
for mm in range(m+1): for mm in range(m+1):
ret[m, mm, d] = flat[count,d] ret[d, m, mm] = flat[count,d]
count += 1 count += 1
return ret return ret
def triang_to_flat(np.ndarray[double, ndim=3] L): def triang_to_flat(np.ndarray[double, ndim=3] L):
cdef int M = L.shape[0] cdef int D = L.shape[0]
cdef int D = L.shape[2] cdef int M = L.shape[1]
cdef int N = M*(M+1)/2 cdef int N = M*(M+1)/2
cdef int count = 0 cdef int count = 0
cdef np.ndarray[double, ndim=2] flat = np.empty((N, D)) cdef np.ndarray[double, ndim=2] flat = np.empty((N, D))
@ -38,7 +38,7 @@ def triang_to_flat(np.ndarray[double, ndim=3] L):
count = 0 count = 0
for m in range(M): for m in range(M):
for mm in range(m+1): for mm in range(m+1):
flat[count,d] = L[m, mm, d] flat[count,d] = L[d, m, mm]
count += 1 count += 1
return flat return flat