[merge] for spgp minibatch and psi NxMxM

This commit is contained in:
Max Zwiessele 2015-09-04 10:37:29 +01:00
commit 9ddec5bc70
11 changed files with 324 additions and 257 deletions

View file

@ -64,9 +64,7 @@ class VarDTC(LatentFunctionInference):
def get_VVTfactor(self, Y, prec): def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
_, output_dim = Y.shape _, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior) uncertain_inputs = isinstance(X, VariationalPosterior)
@ -95,17 +93,28 @@ class VarDTC(LatentFunctionInference):
# The rather complex computations of A, and the psi stats # The rather complex computations of A, and the psi stats
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X) if psi0 is None:
psi1 = kern.psi1(Z, X) psi0 = kern.psi0(Z, X)
if psi1 is None:
psi1 = kern.psi1(Z, X)
if het_noise: if het_noise:
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0) if psi2 is None:
assert len(psi2.shape) == 3 # Need to have not summed out N
#FIXME: Need testing
psi2_beta = np.sum([psi2[X[i:i+1,:], :, :] * beta_i for i,beta_i in enumerate(beta)],0)
else:
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
else: else:
psi2_beta = kern.psi2(Z,X) * beta if psi2 is None:
psi2 = kern.psi2(Z,X)
psi2_beta = psi2 * beta
LmInv = dtrtri(Lm) LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T)) A = LmInv.dot(psi2_beta.dot(LmInv.T))
else: else:
psi0 = kern.Kdiag(X) if psi0 is None:
psi1 = kern.K(X, Z) psi0 = kern.Kdiag(X)
if psi1 is None:
psi1 = kern.K(X, Z)
if het_noise: if het_noise:
tmp = psi1 * (np.sqrt(beta)) tmp = psi1 * (np.sqrt(beta))
else: else:

View file

@ -5,7 +5,7 @@ class StochasticStorage(object):
''' '''
This is a container for holding the stochastic parameters, This is a container for holding the stochastic parameters,
such as subset indices or step length and so on. such as subset indices or step length and so on.
self.d has to be a list of lists: self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions] [dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.10 so that the minibatches can be used as efficiently as possible.10
@ -38,16 +38,17 @@ class SparseGPMissing(StochasticStorage):
import numpy as np import numpy as np
self.Y = model.Y_normalized self.Y = model.Y_normalized
bdict = {} bdict = {}
#For N > 1000 array2string default crops
opt = np.get_printoptions()
np.set_printoptions(threshold='nan')
for d in range(self.Y.shape[1]): for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y[:, d]) inan = np.isnan(self.Y)[:, d]
arr_str = np.array2string(inan, arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'})
try: try:
bdict[arr_str][0].append(d) bdict[arr_str][0].append(d)
except: except:
bdict[arr_str] = [[d], ~inan] bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values() self.d = bdict.values()
class SparseGPStochastics(StochasticStorage): class SparseGPStochastics(StochasticStorage):
@ -55,32 +56,36 @@ class SparseGPStochastics(StochasticStorage):
For the sparse gp we need to store the dimension we are in, For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those and the indices corresponding to those
""" """
def __init__(self, model, batchsize=1): def __init__(self, model, batchsize=1, missing_data=True):
self.batchsize = batchsize self.batchsize = batchsize
self.output_dim = model.Y.shape[1] self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized self.Y = model.Y_normalized
self.missing_data = missing_data
self.reset() self.reset()
self.do_stochastics() self.do_stochastics()
def do_stochastics(self): def do_stochastics(self):
import numpy as np
if self.batchsize == 1: if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]] self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
else: else:
import numpy as np
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False) self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {} bdict = {}
for d in self.d: if self.missing_data:
inan = np.isnan(self.Y[:, d]) opt = np.get_printoptions()
arr_str = int(np.array2string(inan, np.set_printoptions(threshold='nan')
np.inf, 0, for d in self.d:
True, '', inan = np.isnan(self.Y[:, d])
formatter={'bool':lambda x: '1' if x else '0'}), 2) arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
try: try:
bdict[arr_str][0].append(d) bdict[arr_str][0].append(d)
except: except:
bdict[arr_str] = [[d], ~inan] bdict[arr_str] = [[d], ~inan]
self.d = bdict.values() np.set_printoptions(**opt)
self.d = bdict.values()
else:
self.d = [[self.d, None]]
def reset(self): def reset(self):
self.current_dim = -1 self.current_dim = -1

View file

@ -58,24 +58,10 @@ class Kern(Parameterized):
self._sliced_X = 0 self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
from .psi_comp import PSICOMP_GH from .psi_comp import PSICOMP_GH
self.psicomp = PSICOMP_GH() self.psicomp = PSICOMP_GH()
@property
def return_psi2_n(self):
"""
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
"""
return self._return_psi2_n_flag[0]
@return_psi2_n.setter
def return_psi2_n(self, val):
def visit(self):
if isinstance(self, Kern):
self._return_psi2_n_flag[0]=val
self.traverse(visit)
@Cache_this(limit=20) @Cache_this(limit=20)
def _slice_X(self, X): def _slice_X(self, X):
return X[:, self.active_dims] return X[:, self.active_dims]
@ -97,7 +83,9 @@ class Kern(Parameterized):
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self, Z, variational_posterior)[1] return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self, Z, variational_posterior)[2] return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
def psi2n(self, Z, variational_posterior):
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError raise NotImplementedError
def gradients_XX(self, dL_dK, X, X2): def gradients_XX(self, dL_dK, X, X2):
@ -115,7 +103,8 @@ class Kern(Parameterized):
"""Set the gradients of all parameters when doing full (N) inference.""" """Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError raise NotImplementedError
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None):
""" """
Set the gradients of all parameters when doing inference with Set the gradients of all parameters when doing inference with
uncertain inputs, using expectations of the kernel. uncertain inputs, using expectations of the kernel.
@ -126,22 +115,27 @@ class Kern(Parameterized):
dL_dpsi1 * dpsi1_d{theta_i} + dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i} dL_dpsi2 * dpsi2_d{theta_i}
""" """
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0] dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=psi0, psi1=psi1, psi2=psi2)[0]
self.gradient[:] = dtheta self.gradient[:] = dtheta
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None):
""" """
Returns the derivative of the objective wrt Z, using the chain rule Returns the derivative of the objective wrt Z, using the chain rule
through the expectation variables. through the expectation variables.
""" """
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1] return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=psi0, psi1=psi1, psi2=psi2)[1]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
""" """
Compute the gradients wrt the parameters of the variational Compute the gradients wrt the parameters of the variational
distruibution q(X), chain-ruling via the expectations of the kernel distruibution q(X), chain-ruling via the expectations of the kernel
""" """
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:] return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=psi0, psi1=psi1, psi2=psi2)[2:]
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs): def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
""" """

View file

@ -137,25 +137,31 @@ def _slice_psi(f):
def _slice_update_gradients_expectations(f): def _slice_update_gradients_expectations(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
with _Slice_wrap(self, Z, variational_posterior) as s: with _Slice_wrap(self, Z, variational_posterior) as s:
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2) ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2,
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)
return ret return ret
return wrap return wrap
def _slice_gradients_Z_expectations(f): def _slice_gradients_Z_expectations(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
with _Slice_wrap(self, Z, variational_posterior) as s: with _Slice_wrap(self, Z, variational_posterior) as s:
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)) ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2,
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2))
return ret return ret
return wrap return wrap
def _slice_gradients_qX_expectations(f): def _slice_gradients_qX_expectations(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
with _Slice_wrap(self, variational_posterior, Z) as s: with _Slice_wrap(self, variational_posterior, Z) as s:
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X)) ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X,
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2))
r2 = ret[:2] r2 = ret[:2]
ret[0] = s.handle_return_array(r2[0]) ret[0] = s.handle_return_array(r2[0])
ret[1] = s.handle_return_array(r2[1]) ret[1] = s.handle_return_array(r2[1])

View file

@ -12,18 +12,22 @@ from .gaussherm import PSICOMP_GH
class PSICOMP_RBF(Pickleable): class PSICOMP_RBF(Pickleable):
@Cache_this(limit=10, ignore_args=(0,)) @Cache_this(limit=10, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior): def psicomputations(self, variance, lengthscale, Z, variational_posterior, return_psi2_n=False):
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=return_psi2_n)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if return_psi2_n:
raise NotImplementedError('However this function seems to return it by default')
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else: else:
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=10, ignore_args=(0,1,2,3)) @Cache_this(limit=10, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
else: else:
@ -35,7 +39,9 @@ class PSICOMP_RBF(Pickleable):
class PSICOMP_Linear(Pickleable): class PSICOMP_Linear(Pickleable):
@Cache_this(limit=10, ignore_args=(0,)) @Cache_this(limit=10, ignore_args=(0,))
def psicomputations(self, variance, Z, variational_posterior): def psicomputations(self, variance, Z, variational_posterior, return_psi2_n=False):
if return_psi2_n:
raise NotImplementedError
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psicomputations(variance, Z, variational_posterior) return linear_psi_comp.psicomputations(variance, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
@ -44,8 +50,9 @@ class PSICOMP_Linear(Pickleable):
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=10, ignore_args=(0,1,2,3)) @Cache_this(limit=10, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior): def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior, psi0=None, psi1=None, psi2=None):
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
#Should pass psi in
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior) return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior) return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
@ -53,4 +60,4 @@ class PSICOMP_Linear(Pickleable):
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
def _setup_observers(self): def _setup_observers(self):
pass pass

View file

@ -8,7 +8,7 @@ The package for the Psi statistics computation of the linear kernel for Bayesian
import numpy as np import numpy as np
from ....util.linalg import tdot from ....util.linalg import tdot
def psicomputations(variance, Z, variational_posterior): def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
""" """
Compute psi-statistics for ss-linear kernel Compute psi-statistics for ss-linear kernel
""" """
@ -22,7 +22,10 @@ def psicomputations(variance, Z, variational_posterior):
psi0 = (variance*(np.square(mu)+S)).sum(axis=1) psi0 = (variance*(np.square(mu)+S)).sum(axis=1)
psi1 = np.dot(mu,(variance*Z).T) psi1 = np.dot(mu,(variance*Z).T)
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T) if return_psi2_n:
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T)
else:
raise NotImplementedError
return psi0, psi1, psi2 return psi0, psi1, psi2
@ -40,7 +43,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
dL_dmu += 2.*dL_dpsi0_var*mu+np.dot(dL_dpsi1,Z)*variance dL_dmu += 2.*dL_dpsi0_var*mu+np.dot(dL_dpsi1,Z)*variance
dL_dS += dL_dpsi0_var dL_dS += dL_dpsi0_var
dL_dZ += dL_dpsi1_mu*variance dL_dZ += dL_dpsi1_mu*variance
return dL_dvar, dL_dZ, dL_dmu, dL_dS return dL_dvar, dL_dZ, dL_dmu, dL_dS
def _psi2computations(dL_dpsi2, variance, Z, mu, S): def _psi2computations(dL_dpsi2, variance, Z, mu, S):
@ -56,7 +59,7 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S):
# _psi2_dZ MxQ # _psi2_dZ MxQ
# _psi2_dmu NxQ # _psi2_dmu NxQ
# _psi2_dS NxQ # _psi2_dS NxQ
variance2 = np.square(variance) variance2 = np.square(variance)
common_sum = np.dot(mu,(variance*Z).T) common_sum = np.dot(mu,(variance*Z).T)
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0) Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
@ -66,12 +69,12 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S):
Z1_expect = np.dot(dL_dpsi2T,Z) Z1_expect = np.dot(dL_dpsi2T,Z)
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0) dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
dL_dmu = common_expect*variance dL_dmu = common_expect*variance
dL_dS = np.empty(S.shape) dL_dS = np.empty(S.shape)
dL_dS[:] = Z_expect*variance2 dL_dS[:] = Z_expect*variance2
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu) dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
return dL_dvar, dL_dmu, dL_dS, dL_dZ return dL_dvar, dL_dmu, dL_dS, dL_dZ

View file

@ -5,7 +5,7 @@ The module for psi-statistics for RBF kernel
import numpy as np import numpy as np
from GPy.util.caching import Cacher from GPy.util.caching import Cacher
def psicomputations(variance, lengthscale, Z, variational_posterior): def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=False):
""" """
Z - MxQ Z - MxQ
mu - NxQ mu - NxQ
@ -21,7 +21,9 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
psi0 = np.empty(mu.shape[0]) psi0 = np.empty(mu.shape[0])
psi0[:] = variance psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S) psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0) psi2 = _psi2computations(variance, lengthscale, Z, mu, S)
if not return_psi2_n:
psi2 = psi2.sum(axis=0)
return psi0, psi1, psi2 return psi0, psi1, psi2
def __psi1computations(variance, lengthscale, Z, mu, S): def __psi1computations(variance, lengthscale, Z, mu, S):
@ -66,11 +68,12 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2 return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi1=psi1, Lpsi1=Lpsi1)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi2=psi2, Lpsi2=Lpsi2)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
@ -84,7 +87,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None, Lpsi1=None):
""" """
dL_dpsi1 - NxM dL_dpsi1 - NxM
Z - MxQ Z - MxQ
@ -103,8 +106,10 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S) if psi1 is None:
Lpsi1 = dL_dpsi1*_psi1 psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
if Lpsi1 is None:
Lpsi1 = dL_dpsi1*psi1
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
denom = 1./(S+lengthscale2) denom = 1./(S+lengthscale2)
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
@ -116,7 +121,7 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None):
""" """
Z - MxQ Z - MxQ
mu - NxQ mu - NxQ
@ -137,8 +142,10 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
denom = 1./(2*S+lengthscale2) denom = 1./(2*S+lengthscale2)
denom2 = np.square(denom) denom2 = np.square(denom)
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM if psi2 is None:
Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
if Lpsi2 is None:
Lpsi2 = dL_dpsi2*psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
@ -149,8 +156,14 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
_dL_dvar = Lpsi2sum.sum()*2/variance _dL_dvar = Lpsi2sum.sum()*2/variance
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \ _dL_dZ1 = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) _dL_dZ2 = np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2
_dL_dZ3 = 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom)
_dL_dZ4 = - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z)
_dL_dZ5 = - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
_dL_dZ = _dL_dZ1 + _dL_dZ2 + _dL_dZ3 + _dL_dZ4 + _dL_dZ5
#_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
#2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))- _dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
@ -158,3 +171,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
_psi1computations = Cacher(__psi1computations, limit=5) _psi1computations = Cacher(__psi1computations, limit=5)
_psi2computations = Cacher(__psi2computations, limit=5) _psi2computations = Cacher(__psi2computations, limit=5)
_psi1compDer = Cacher(__psi1compDer, limit=5)
_psi2compDer = Cacher(__psi2compDer, limit=5)

View file

@ -9,7 +9,7 @@ import numpy as np
try: try:
from scipy import weave from scipy import weave
def _psicomputations(variance, lengthscale, Z, variational_posterior): def _psicomputations(variance, lengthscale, Z, variational_posterior):
""" """
Z - MxQ Z - MxQ
@ -23,7 +23,7 @@ try:
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale) l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1) log_denom1 = np.log(S/l2+1)
@ -35,13 +35,13 @@ try:
psi0[:] = variance psi0[:] = variance
psi1 = np.empty((N,M)) psi1 = np.empty((N,M))
psi2n = np.empty((N,M,M)) psi2n = np.empty((N,M,M))
from ....util.misc import param_to_array from ....util.misc import param_to_array
S = param_to_array(S) S = param_to_array(S)
mu = param_to_array(mu) mu = param_to_array(mu)
gamma = param_to_array(gamma) gamma = param_to_array(gamma)
Z = param_to_array(Z) Z = param_to_array(Z)
support_code = """ support_code = """
#include <math.h> #include <math.h>
""" """
@ -56,11 +56,11 @@ try:
double lq = l2(q); double lq = l2(q);
double Zm1q = Z(m1,q); double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q); double Zm2q = Z(m2,q);
if(m2==0) { if(m2==0) {
// Compute Psi_1 // Compute Psi_1
double muZ = mu(n,q)-Z(m1,q); double muZ = mu(n,q)-Z(m1,q);
double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.; double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq); double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2)); log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
@ -69,10 +69,10 @@ try:
double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.; double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q; double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
double dZ = Zm1q - Zm2q; double dZ = Zm1q - Zm2q;
double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q); double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq); double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2)); log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
} }
double exp_psi2_n = exp(log_psi2_n); double exp_psi2_n = exp(log_psi2_n);
psi2n(n,m1,m2) = variance*variance*exp_psi2_n; psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
@ -83,18 +83,18 @@ try:
} }
""" """
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
psi2 = psi2n.sum(axis=0) psi2 = psi2n.sum(axis=0)
return psi0,psi1,psi2,psi2n return psi0,psi1,psi2,psi2n
from GPy.util.caching import Cacher from GPy.util.caching import Cacher
psicomputations = Cacher(_psicomputations, limit=1) psicomputations = Cacher(_psicomputations, limit=1)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
_,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior) _,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
@ -105,7 +105,7 @@ try:
log_gamma = np.log(gamma) log_gamma = np.log(gamma)
log_gamma1 = np.log(1.-gamma) log_gamma1 = np.log(1.-gamma)
variance = float(variance) variance = float(variance)
dvar = np.zeros(1) dvar = np.zeros(1)
dmu = np.zeros((N,Q)) dmu = np.zeros((N,Q))
dS = np.zeros((N,Q)) dS = np.zeros((N,Q))
@ -113,13 +113,13 @@ try:
dl = np.zeros(Q) dl = np.zeros(Q)
dZ = np.zeros((M,Q)) dZ = np.zeros((M,Q))
dvar += np.sum(dL_dpsi0) dvar += np.sum(dL_dpsi0)
from ....util.misc import param_to_array from ....util.misc import param_to_array
S = param_to_array(S) S = param_to_array(S)
mu = param_to_array(mu) mu = param_to_array(mu)
gamma = param_to_array(gamma) gamma = param_to_array(gamma)
Z = param_to_array(Z) Z = param_to_array(Z)
support_code = """ support_code = """
#include <math.h> #include <math.h>
""" """
@ -136,16 +136,16 @@ try:
double Zm2q = Z(m2,q); double Zm2q = Z(m2,q);
double gnq = gamma(n,q); double gnq = gamma(n,q);
double mu_nq = mu(n,q); double mu_nq = mu(n,q);
if(m2==0) { if(m2==0) {
// Compute Psi_1 // Compute Psi_1
double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1); double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
if(q==0) {dvar(0) += lpsi1/variance;} if(q==0) {dvar(0) += lpsi1/variance;}
double Zmu = Zm1q - mu_nq; double Zmu = Zm1q - mu_nq;
double denom = Snq+lq; double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom; double Zmu2_denom = Zmu*Zmu/denom;
double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.); double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq); double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
double d_exp1,d_exp2; double d_exp1,d_exp2;
@ -157,7 +157,7 @@ try:
d_exp2 = 1.; d_exp2 = 1.;
} }
double exp_sum = d_exp1+d_exp2; double exp_sum = d_exp1+d_exp2;
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum); dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.; dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
@ -167,13 +167,13 @@ try:
// Compute Psi_2 // Compute Psi_2
double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2); double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
if(q==0) {dvar(0) += lpsi2*2/variance;} if(q==0) {dvar(0) += lpsi2*2/variance;}
double dZm1m2 = Zm1q - Zm2q; double dZm1m2 = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q; double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.; double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq; double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom; double muZhat2_denom = muZhat*muZhat/denom;
double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q); double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double exp2 = log_gamma1(n,q) - Z2/(2.*lq); double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
double d_exp1,d_exp2; double d_exp1,d_exp2;
@ -185,23 +185,23 @@ try:
d_exp2 = 1.; d_exp2 = 1.;
} }
double exp_sum = d_exp1+d_exp2; double exp_sum = d_exp1+d_exp2;
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum; dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum; dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum; dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
} }
} }
} }
} }
""" """
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
dl *= 2.*lengthscale dl *= 2.*lengthscale
if not ARD: if not ARD:
dl = dl.sum() dl = dl.sum()
return dvar, dl, dZ, dmu, dS, dgamma return dvar, dl, dZ, dmu, dS, dgamma
except: except:
@ -219,13 +219,13 @@ except:
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
psi0 = np.empty(mu.shape[0]) psi0 = np.empty(mu.shape[0])
psi0[:] = variance psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2 return psi0, psi1, psi2
def _psi1computations(variance, lengthscale, Z, mu, S, gamma): def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
@ -236,9 +236,9 @@ except:
# here are the "statistics" for psi1 # here are the "statistics" for psi1
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
# psi1 # psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
@ -251,9 +251,9 @@ except:
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM _psi1 = variance * np.exp(_psi1_exp_sum) # NxM
return _psi1 return _psi1
def _psi2computations(variance, lengthscale, Z, mu, S, gamma): def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
@ -264,14 +264,14 @@ except:
# here are the "statistics" for psi2 # here are the "statistics" for psi2
# Produced intermediate results: # Produced intermediate results:
# _psi2 MxM # _psi2 MxM
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2 # psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom) _psi2_denom_sqrt = np.sqrt(_psi2_denom)
@ -284,28 +284,28 @@ except:
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2 return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2 dL_dlengscale = dl_psi1 + dl_psi2
if not ARD: if not ARD:
dL_dlengscale = dL_dlengscale.sum() dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2 dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2 dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2 dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2 dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
""" """
dL_dpsi1 - NxM dL_dpsi1 - NxM
@ -322,9 +322,9 @@ except:
# _dL_dgamma NxQ # _dL_dgamma NxQ
# _dL_dmu NxQ # _dL_dmu NxQ
# _dL_dS NxQ # _dL_dS NxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
# psi1 # psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ _psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
@ -346,9 +346,9 @@ except:
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
@ -365,14 +365,14 @@ except:
# _dL_dgamma NxQ # _dL_dgamma NxQ
# _dL_dmu NxQ # _dL_dmu NxQ
# _dL_dS NxQ # _dL_dS NxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2 # psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom) _psi2_denom_sqrt = np.sqrt(_psi2_denom)
@ -384,7 +384,7 @@ except:
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
@ -394,5 +394,5 @@ except:
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -59,16 +59,22 @@ class RBF(Stationary):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2] return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=False)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def psi2n(self, Z, variational_posterior):
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2] return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=True)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[:2]
self.variance.gradient = dL_dvar self.variance.gradient = dL_dvar
self.lengthscale.gradient = dL_dlengscale self.lengthscale.gradient = dL_dlengscale
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2] psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[2]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:] psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[3:]

View file

@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
import logging import logging
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
from GPy.core.parameterization.param import Param from GPy.core.parameterization.param import Param
from GPy.core.parameterization.observable_array import ObsAr
class BayesianGPLVMMiniBatch(SparseGPMiniBatch): class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
""" """
@ -80,46 +81,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient return X.mean.gradient, X.variance.gradient
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw) posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
if self.has_uncertain_inputs(): return posterior, log_marginal_likelihood, grad_dict
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
variational_posterior=X,
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
else:
current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], X, Z)
current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], X)
if subset_indices is not None:
value_indices['Xgrad'] = subset_indices['samples']
kl_fctr = self.kl_factr
if self.has_uncertain_inputs():
if self.missing_data:
d = self.output_dim
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
else:
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
# Subsetting Variational Posterior objects, makes the gradients
# empty. We need them to be 0 though:
X.mean.gradient[:] = 0
X.variance.gradient[:] = 0
self.variational_prior.update_gradients_KL(X)
if self.missing_data:
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
else:
current_values['meangrad'] += kl_fctr*X.mean.gradient
current_values['vargrad'] += kl_fctr*X.variance.gradient
if subset_indices is not None:
value_indices['meangrad'] = subset_indices['samples']
value_indices['vargrad'] = subset_indices['samples']
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
def _outer_values_update(self, full_values): def _outer_values_update(self, full_values):
""" """
@ -128,20 +93,46 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
""" """
super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
if self.has_uncertain_inputs(): if self.has_uncertain_inputs():
self.X.mean.gradient = full_values['meangrad'] meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations(
self.X.variance.gradient = full_values['vargrad'] variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'],
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
kl_fctr = self.kl_factr
self.X.mean.gradient[:] = 0
self.X.variance.gradient[:] = 0
self.variational_prior.update_gradients_KL(self.X)
if self.missing_data or not self.stochastics:
self.X.mean.gradient = kl_fctr*self.X.mean.gradient
self.X.variance.gradient = kl_fctr*self.X.variance.gradient
else:
d = self.output_dim
self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
self.X.mean.gradient += meangrad_tmp
self.X.variance.gradient += vargrad_tmp
else: else:
self.X.gradient = full_values['Xgrad'] self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
def _outer_init_full_values(self): def _outer_init_full_values(self):
if self.has_uncertain_inputs(): full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
return dict(meangrad=np.zeros(self.X.mean.shape), return full_values
vargrad=np.zeros(self.X.variance.shape))
else:
return dict(Xgrad=np.zeros(self.X.shape))
def parameters_changed(self): def parameters_changed(self):
super(BayesianGPLVMMiniBatch,self).parameters_changed() super(BayesianGPLVMMiniBatch,self).parameters_changed()
kl_fctr = self.kl_factr
if self.missing_data or not self.stochastics:
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
elif self.stochastics:
d = self.output_dim
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
if isinstance(self.inference_method, VarDTC_minibatch): if isinstance(self.inference_method, VarDTC_minibatch):
return return

View file

@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP):
if stochastic and missing_data: if stochastic and missing_data:
self.missing_data = True self.missing_data = True
self.stochastics = SparseGPStochastics(self, batchsize) self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
elif stochastic and not missing_data: elif stochastic and not missing_data:
self.missing_data = False self.missing_data = False
self.stochastics = SparseGPStochastics(self, batchsize) self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
elif missing_data: elif missing_data:
self.missing_data = True self.missing_data = True
self.stochastics = SparseGPMissing(self) self.stochastics = SparseGPMissing(self)
@ -81,7 +81,7 @@ class SparseGPMiniBatch(SparseGP):
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs):
""" """
This is the standard part, which usually belongs in parameters_changed. This is the standard part, which usually belongs in parameters_changed.
@ -100,47 +100,13 @@ class SparseGPMiniBatch(SparseGP):
like them into this dictionary for inner use of the indices inside the like them into this dictionary for inner use of the indices inside the
algorithm. algorithm.
""" """
try: if psi2 is None:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs) psi2_sum_n = None
except:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
current_values = {}
likelihood.update_gradients(grad_dict['dL_dthetaL'])
current_values['likgrad'] = likelihood.gradient.copy()
if subset_indices is None:
subset_indices = {}
if isinstance(X, VariationalPosterior):
#gradients wrt kernel
dL_dKmm = grad_dict['dL_dKmm']
kern.update_gradients_full(dL_dKmm, Z, None)
current_values['kerngrad'] = kern.gradient.copy()
kern.update_gradients_expectations(variational_posterior=X,
Z=Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
current_values['Zgrad'] += kern.gradients_Z_expectations(
grad_dict['dL_dpsi0'],
grad_dict['dL_dpsi1'],
grad_dict['dL_dpsi2'],
Z=Z,
variational_posterior=X)
else: else:
#gradients wrt kernel psi2_sum_n = psi2.sum(axis=0)
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X) posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
current_values['kerngrad'] = kern.gradient.copy() dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z) return posterior, log_marginal_likelihood, grad_dict
current_values['kerngrad'] += kern.gradient
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None): def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
""" """
@ -174,7 +140,10 @@ class SparseGPMiniBatch(SparseGP):
else: else:
index = slice(None) index = slice(None)
if key in full_values: if key in full_values:
full_values[key][index] += current_values[key] try:
full_values[key][index] += current_values[key]
except:
full_values[key] += current_values[key]
else: else:
full_values[key] = current_values[key] full_values[key] = current_values[key]
@ -193,9 +162,43 @@ class SparseGPMiniBatch(SparseGP):
Here you put the values, which were collected before in the right places. Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc. E.g. set the gradients of parameters, etc.
""" """
self.likelihood.gradient = full_values['likgrad'] if self.has_uncertain_inputs():
self.kern.gradient = full_values['kerngrad'] #gradients wrt kernel
self.Z.gradient = full_values['Zgrad'] dL_dKmm = full_values['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
kgrad = self.kern.gradient.copy()
self.kern.update_gradients_expectations(
variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'],
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
self.kern.gradient += kgrad
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'],
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
else:
#gradients wrt kernel
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
kgrad = self.kern.gradient.copy()
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
kgrad += self.kern.gradient
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
self.kern.gradient += kgrad
#kgrad += self.kern.gradient
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
self.likelihood.update_gradients(full_values['dL_dthetaL'])
def _outer_init_full_values(self): def _outer_init_full_values(self):
""" """
@ -210,7 +213,15 @@ class SparseGPMiniBatch(SparseGP):
to initialize the gradients for the mean and the variance in order to to initialize the gradients for the mean and the variance in order to
have the full gradient for indexing) have the full gradient for indexing)
""" """
return {} retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0])))
if self.has_uncertain_inputs():
retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]),
dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])),
dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))))
else:
retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]),
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))})
return retd
def _outer_loop_for_missing_data(self): def _outer_loop_for_missing_data(self):
Lm = None Lm = None
@ -232,28 +243,36 @@ class SparseGPMiniBatch(SparseGP):
print(message, end=' ') print(message, end=' ')
for d, ninan in self.stochastics.d: for d, ninan in self.stochastics.d:
if not self.stochastics: if not self.stochastics:
print(' '*(len(message)) + '\r', end=' ') print(' '*(len(message)) + '\r', end=' ')
message = m_f(d) message = m_f(d)
print(message, end=' ') print(message, end=' ')
posterior, log_marginal_likelihood, \ psi0ni = self.psi0[ninan]
grad_dict, current_values, value_indices = self._inner_parameters_changed( psi1ni = self.psi1[ninan]
if self.has_uncertain_inputs():
psi2ni = self.psi2[ninan]
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan)
else:
psi2ni = None
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed(
self.kern, self.X[ninan], self.kern, self.X[ninan],
self.Z, self.likelihood, self.Z, self.likelihood,
self.Y_normalized[ninan][:, d], self.Y_metadata, self.Y_normalized[ninan][:, d], self.Y_metadata,
Lm, dL_dKmm, Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan)) psi0=psi0ni, psi1=psi1ni, psi2=psi2ni)
self._inner_take_over_or_update(self.full_values, current_values, value_indices) # Fill out the full values by adding in the apporpriate grad_dict
self._inner_values_update(current_values) # values
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
self._inner_values_update(grad_dict) # What is this for? -> MRD
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None] woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
woodbury_vector[:, d] = posterior.woodbury_vector woodbury_vector[:, d] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics: if not self.stochastics:
print('') print('')
@ -261,10 +280,10 @@ class SparseGPMiniBatch(SparseGP):
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol) K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
self._outer_values_update(self.full_values) self._outer_values_update(self.full_values)
if self.has_uncertain_inputs():
self.kern.return_psi2_n = False
def _outer_loop_without_missing_data(self): def _outer_loop_without_missing_data(self):
self._log_marginal_likelihood = 0
if self.posterior is None: if self.posterior is None:
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim)) woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim)) woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
@ -272,17 +291,16 @@ class SparseGPMiniBatch(SparseGP):
woodbury_inv = self.posterior._woodbury_inv woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector woodbury_vector = self.posterior._woodbury_vector
d = self.stochastics.d d = self.stochastics.d[0][0]
posterior, log_marginal_likelihood, \ posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
grad_dict, self.full_values, _ = self._inner_parameters_changed(
self.kern, self.X, self.kern, self.X,
self.Z, self.likelihood, self.Z, self.likelihood,
self.Y_normalized[:, d], self.Y_metadata) self.Y_normalized[:, d], self.Y_metadata)
self.grad_dict = grad_dict self.grad_dict = grad_dict
self._log_marginal_likelihood += log_marginal_likelihood self._log_marginal_likelihood = log_marginal_likelihood
self._outer_values_update(self.full_values) self._outer_values_update(self.grad_dict)
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None] woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
woodbury_vector[:, d] = posterior.woodbury_vector woodbury_vector[:, d] = posterior.woodbury_vector
@ -291,10 +309,23 @@ class SparseGPMiniBatch(SparseGP):
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol) K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
def parameters_changed(self): def parameters_changed(self):
#Compute the psi statistics for N once, but don't sum out N in psi2
if self.has_uncertain_inputs():
#psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
#psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
#psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
self.psi0 = self.kern.psi0(self.Z, self.X)
self.psi1 = self.kern.psi1(self.Z, self.X)
self.psi2 = self.kern.psi2n(self.Z, self.X)
else:
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.X, self.Z)
self.psi2 = None
if self.missing_data: if self.missing_data:
self._outer_loop_for_missing_data() self._outer_loop_for_missing_data()
elif self.stochastics: elif self.stochastics:
self._outer_loop_without_missing_data() self._outer_loop_without_missing_data()
else: else:
self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self._outer_values_update(self.full_values) self._outer_values_update(self.grad_dict)