mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 04:22:38 +02:00
[merge] for spgp minibatch and psi NxMxM
This commit is contained in:
commit
9ddec5bc70
11 changed files with 324 additions and 257 deletions
|
|
@ -64,9 +64,7 @@ class VarDTC(LatentFunctionInference):
|
|||
def get_VVTfactor(self, Y, prec):
|
||||
return Y * prec # TODO chache this, and make it effective
|
||||
|
||||
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
|
||||
|
||||
_, output_dim = Y.shape
|
||||
uncertain_inputs = isinstance(X, VariationalPosterior)
|
||||
|
|
@ -95,17 +93,28 @@ class VarDTC(LatentFunctionInference):
|
|||
|
||||
# The rather complex computations of A, and the psi stats
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X)
|
||||
psi1 = kern.psi1(Z, X)
|
||||
if psi0 is None:
|
||||
psi0 = kern.psi0(Z, X)
|
||||
if psi1 is None:
|
||||
psi1 = kern.psi1(Z, X)
|
||||
if het_noise:
|
||||
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
|
||||
if psi2 is None:
|
||||
assert len(psi2.shape) == 3 # Need to have not summed out N
|
||||
#FIXME: Need testing
|
||||
psi2_beta = np.sum([psi2[X[i:i+1,:], :, :] * beta_i for i,beta_i in enumerate(beta)],0)
|
||||
else:
|
||||
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
|
||||
else:
|
||||
psi2_beta = kern.psi2(Z,X) * beta
|
||||
if psi2 is None:
|
||||
psi2 = kern.psi2(Z,X)
|
||||
psi2_beta = psi2 * beta
|
||||
LmInv = dtrtri(Lm)
|
||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||
else:
|
||||
psi0 = kern.Kdiag(X)
|
||||
psi1 = kern.K(X, Z)
|
||||
if psi0 is None:
|
||||
psi0 = kern.Kdiag(X)
|
||||
if psi1 is None:
|
||||
psi1 = kern.K(X, Z)
|
||||
if het_noise:
|
||||
tmp = psi1 * (np.sqrt(beta))
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ class StochasticStorage(object):
|
|||
'''
|
||||
This is a container for holding the stochastic parameters,
|
||||
such as subset indices or step length and so on.
|
||||
|
||||
|
||||
self.d has to be a list of lists:
|
||||
[dimension indices, nan indices for those dimensions]
|
||||
so that the minibatches can be used as efficiently as possible.10
|
||||
|
|
@ -38,16 +38,17 @@ class SparseGPMissing(StochasticStorage):
|
|||
import numpy as np
|
||||
self.Y = model.Y_normalized
|
||||
bdict = {}
|
||||
#For N > 1000 array2string default crops
|
||||
opt = np.get_printoptions()
|
||||
np.set_printoptions(threshold='nan')
|
||||
for d in range(self.Y.shape[1]):
|
||||
inan = np.isnan(self.Y[:, d])
|
||||
arr_str = np.array2string(inan,
|
||||
np.inf, 0,
|
||||
True, '',
|
||||
formatter={'bool':lambda x: '1' if x else '0'})
|
||||
inan = np.isnan(self.Y)[:, d]
|
||||
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
np.set_printoptions(**opt)
|
||||
self.d = bdict.values()
|
||||
|
||||
class SparseGPStochastics(StochasticStorage):
|
||||
|
|
@ -55,32 +56,36 @@ class SparseGPStochastics(StochasticStorage):
|
|||
For the sparse gp we need to store the dimension we are in,
|
||||
and the indices corresponding to those
|
||||
"""
|
||||
def __init__(self, model, batchsize=1):
|
||||
def __init__(self, model, batchsize=1, missing_data=True):
|
||||
self.batchsize = batchsize
|
||||
self.output_dim = model.Y.shape[1]
|
||||
self.Y = model.Y_normalized
|
||||
self.missing_data = missing_data
|
||||
self.reset()
|
||||
self.do_stochastics()
|
||||
|
||||
def do_stochastics(self):
|
||||
import numpy as np
|
||||
if self.batchsize == 1:
|
||||
self.current_dim = (self.current_dim+1)%self.output_dim
|
||||
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
|
||||
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
|
||||
else:
|
||||
import numpy as np
|
||||
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
||||
bdict = {}
|
||||
for d in self.d:
|
||||
inan = np.isnan(self.Y[:, d])
|
||||
arr_str = int(np.array2string(inan,
|
||||
np.inf, 0,
|
||||
True, '',
|
||||
formatter={'bool':lambda x: '1' if x else '0'}), 2)
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
self.d = bdict.values()
|
||||
if self.missing_data:
|
||||
opt = np.get_printoptions()
|
||||
np.set_printoptions(threshold='nan')
|
||||
for d in self.d:
|
||||
inan = np.isnan(self.Y[:, d])
|
||||
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
np.set_printoptions(**opt)
|
||||
self.d = bdict.values()
|
||||
else:
|
||||
self.d = [[self.d, None]]
|
||||
|
||||
def reset(self):
|
||||
self.current_dim = -1
|
||||
|
|
|
|||
|
|
@ -58,24 +58,10 @@ class Kern(Parameterized):
|
|||
|
||||
self._sliced_X = 0
|
||||
self.useGPU = self._support_GPU and useGPU
|
||||
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
|
||||
|
||||
from .psi_comp import PSICOMP_GH
|
||||
self.psicomp = PSICOMP_GH()
|
||||
|
||||
@property
|
||||
def return_psi2_n(self):
|
||||
"""
|
||||
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
|
||||
"""
|
||||
return self._return_psi2_n_flag[0]
|
||||
@return_psi2_n.setter
|
||||
def return_psi2_n(self, val):
|
||||
def visit(self):
|
||||
if isinstance(self, Kern):
|
||||
self._return_psi2_n_flag[0]=val
|
||||
self.traverse(visit)
|
||||
|
||||
@Cache_this(limit=20)
|
||||
def _slice_X(self, X):
|
||||
return X[:, self.active_dims]
|
||||
|
|
@ -97,7 +83,9 @@ class Kern(Parameterized):
|
|||
def psi1(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
|
||||
def psi2(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[2]
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
|
||||
def gradients_X(self, dL_dK, X, X2):
|
||||
raise NotImplementedError
|
||||
def gradients_XX(self, dL_dK, X, X2):
|
||||
|
|
@ -115,7 +103,8 @@ class Kern(Parameterized):
|
|||
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||
raise NotImplementedError
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None):
|
||||
"""
|
||||
Set the gradients of all parameters when doing inference with
|
||||
uncertain inputs, using expectations of the kernel.
|
||||
|
|
@ -126,22 +115,27 @@ class Kern(Parameterized):
|
|||
dL_dpsi1 * dpsi1_d{theta_i} +
|
||||
dL_dpsi2 * dpsi2_d{theta_i}
|
||||
"""
|
||||
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
|
||||
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2)[0]
|
||||
self.gradient[:] = dtheta
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None):
|
||||
"""
|
||||
Returns the derivative of the objective wrt Z, using the chain rule
|
||||
through the expectation variables.
|
||||
"""
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1]
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2)[1]
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
"""
|
||||
Compute the gradients wrt the parameters of the variational
|
||||
distruibution q(X), chain-ruling via the expectations of the kernel
|
||||
"""
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:]
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2)[2:]
|
||||
|
||||
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -137,25 +137,31 @@ def _slice_psi(f):
|
|||
|
||||
def _slice_update_gradients_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)
|
||||
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_Z_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
|
||||
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_qX_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
with _Slice_wrap(self, variational_posterior, Z) as s:
|
||||
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
|
||||
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2))
|
||||
r2 = ret[:2]
|
||||
ret[0] = s.handle_return_array(r2[0])
|
||||
ret[1] = s.handle_return_array(r2[1])
|
||||
|
|
|
|||
|
|
@ -12,18 +12,22 @@ from .gaussherm import PSICOMP_GH
|
|||
|
||||
class PSICOMP_RBF(Pickleable):
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior, return_psi2_n=False):
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=return_psi2_n)
|
||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if return_psi2_n:
|
||||
raise NotImplementedError('However this function seems to return it by default')
|
||||
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,1,2,3))
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
|
||||
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)
|
||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
|
|
@ -35,7 +39,9 @@ class PSICOMP_RBF(Pickleable):
|
|||
class PSICOMP_Linear(Pickleable):
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
def psicomputations(self, variance, Z, variational_posterior):
|
||||
def psicomputations(self, variance, Z, variational_posterior, return_psi2_n=False):
|
||||
if return_psi2_n:
|
||||
raise NotImplementedError
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
return linear_psi_comp.psicomputations(variance, Z, variational_posterior)
|
||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
|
|
@ -44,8 +50,9 @@ class PSICOMP_Linear(Pickleable):
|
|||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,1,2,3))
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior, psi0=None, psi1=None, psi2=None):
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
#Should pass psi in
|
||||
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
|
||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
|
||||
|
|
@ -53,4 +60,4 @@ class PSICOMP_Linear(Pickleable):
|
|||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
def _setup_observers(self):
|
||||
pass
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ The package for the Psi statistics computation of the linear kernel for Bayesian
|
|||
import numpy as np
|
||||
from ....util.linalg import tdot
|
||||
|
||||
def psicomputations(variance, Z, variational_posterior):
|
||||
def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
|
||||
"""
|
||||
Compute psi-statistics for ss-linear kernel
|
||||
"""
|
||||
|
|
@ -22,7 +22,10 @@ def psicomputations(variance, Z, variational_posterior):
|
|||
|
||||
psi0 = (variance*(np.square(mu)+S)).sum(axis=1)
|
||||
psi1 = np.dot(mu,(variance*Z).T)
|
||||
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T)
|
||||
if return_psi2_n:
|
||||
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return psi0, psi1, psi2
|
||||
|
||||
|
|
@ -40,7 +43,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
|
|||
dL_dmu += 2.*dL_dpsi0_var*mu+np.dot(dL_dpsi1,Z)*variance
|
||||
dL_dS += dL_dpsi0_var
|
||||
dL_dZ += dL_dpsi1_mu*variance
|
||||
|
||||
|
||||
return dL_dvar, dL_dZ, dL_dmu, dL_dS
|
||||
|
||||
def _psi2computations(dL_dpsi2, variance, Z, mu, S):
|
||||
|
|
@ -56,7 +59,7 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S):
|
|||
# _psi2_dZ MxQ
|
||||
# _psi2_dmu NxQ
|
||||
# _psi2_dS NxQ
|
||||
|
||||
|
||||
variance2 = np.square(variance)
|
||||
common_sum = np.dot(mu,(variance*Z).T)
|
||||
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
|
||||
|
|
@ -66,12 +69,12 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S):
|
|||
Z1_expect = np.dot(dL_dpsi2T,Z)
|
||||
|
||||
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
|
||||
|
||||
|
||||
dL_dmu = common_expect*variance
|
||||
|
||||
|
||||
dL_dS = np.empty(S.shape)
|
||||
dL_dS[:] = Z_expect*variance2
|
||||
|
||||
|
||||
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
|
||||
|
||||
return dL_dvar, dL_dmu, dL_dS, dL_dZ
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ The module for psi-statistics for RBF kernel
|
|||
import numpy as np
|
||||
from GPy.util.caching import Cacher
|
||||
|
||||
def psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||
def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=False):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
|
|
@ -21,7 +21,9 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
|
|||
psi0 = np.empty(mu.shape[0])
|
||||
psi0[:] = variance
|
||||
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S)
|
||||
if not return_psi2_n:
|
||||
psi2 = psi2.sum(axis=0)
|
||||
return psi0, psi1, psi2
|
||||
|
||||
def __psi1computations(variance, lengthscale, Z, mu, S):
|
||||
|
|
@ -66,11 +68,12 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
|
|||
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
|
||||
return _psi2
|
||||
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi1=psi1, Lpsi1=Lpsi1)
|
||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi2=psi2, Lpsi2=Lpsi2)
|
||||
|
||||
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||
|
||||
|
|
@ -84,7 +87,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
|
|||
|
||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
||||
|
||||
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
|
||||
def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None, Lpsi1=None):
|
||||
"""
|
||||
dL_dpsi1 - NxM
|
||||
Z - MxQ
|
||||
|
|
@ -103,8 +106,10 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
|
|||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||
Lpsi1 = dL_dpsi1*_psi1
|
||||
if psi1 is None:
|
||||
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||
if Lpsi1 is None:
|
||||
Lpsi1 = dL_dpsi1*psi1
|
||||
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
|
||||
denom = 1./(S+lengthscale2)
|
||||
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
|
||||
|
|
@ -116,7 +121,7 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
|
|||
|
||||
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||
|
||||
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
||||
def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
|
|
@ -137,8 +142,10 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
|||
denom = 1./(2*S+lengthscale2)
|
||||
denom2 = np.square(denom)
|
||||
|
||||
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
||||
Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
|
||||
if psi2 is None:
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
|
||||
if Lpsi2 is None:
|
||||
Lpsi2 = dL_dpsi2*psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
|
||||
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
|
||||
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
|
||||
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
|
||||
|
|
@ -149,8 +156,14 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
|||
_dL_dvar = Lpsi2sum.sum()*2/variance
|
||||
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
|
||||
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
|
||||
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
|
||||
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
||||
_dL_dZ1 = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2
|
||||
_dL_dZ2 = np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2
|
||||
_dL_dZ3 = 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom)
|
||||
_dL_dZ4 = - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z)
|
||||
_dL_dZ5 = - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
||||
_dL_dZ = _dL_dZ1 + _dL_dZ2 + _dL_dZ3 + _dL_dZ4 + _dL_dZ5
|
||||
#_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
|
||||
#2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
||||
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
|
||||
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
|
||||
|
||||
|
|
@ -158,3 +171,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
|||
|
||||
_psi1computations = Cacher(__psi1computations, limit=5)
|
||||
_psi2computations = Cacher(__psi2computations, limit=5)
|
||||
_psi1compDer = Cacher(__psi1compDer, limit=5)
|
||||
_psi2compDer = Cacher(__psi2compDer, limit=5)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import numpy as np
|
|||
|
||||
try:
|
||||
from scipy import weave
|
||||
|
||||
|
||||
def _psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -23,7 +23,7 @@ try:
|
|||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
l2 = np.square(lengthscale)
|
||||
log_denom1 = np.log(S/l2+1)
|
||||
|
|
@ -35,13 +35,13 @@ try:
|
|||
psi0[:] = variance
|
||||
psi1 = np.empty((N,M))
|
||||
psi2n = np.empty((N,M,M))
|
||||
|
||||
|
||||
from ....util.misc import param_to_array
|
||||
S = param_to_array(S)
|
||||
mu = param_to_array(mu)
|
||||
gamma = param_to_array(gamma)
|
||||
Z = param_to_array(Z)
|
||||
|
||||
|
||||
support_code = """
|
||||
#include <math.h>
|
||||
"""
|
||||
|
|
@ -56,11 +56,11 @@ try:
|
|||
double lq = l2(q);
|
||||
double Zm1q = Z(m1,q);
|
||||
double Zm2q = Z(m2,q);
|
||||
|
||||
|
||||
if(m2==0) {
|
||||
// Compute Psi_1
|
||||
double muZ = mu(n,q)-Z(m1,q);
|
||||
|
||||
|
||||
double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
|
||||
double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
|
||||
log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
|
||||
|
|
@ -69,10 +69,10 @@ try:
|
|||
double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
|
||||
double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
|
||||
double dZ = Zm1q - Zm2q;
|
||||
|
||||
|
||||
double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
|
||||
double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
|
||||
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
|
||||
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
|
||||
}
|
||||
double exp_psi2_n = exp(log_psi2_n);
|
||||
psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
|
||||
|
|
@ -83,18 +83,18 @@ try:
|
|||
}
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
|
||||
|
||||
|
||||
psi2 = psi2n.sum(axis=0)
|
||||
return psi0,psi1,psi2,psi2n
|
||||
|
||||
|
||||
from GPy.util.caching import Cacher
|
||||
psicomputations = Cacher(_psicomputations, limit=1)
|
||||
|
||||
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
||||
|
||||
_,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||
|
||||
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
|
@ -105,7 +105,7 @@ try:
|
|||
log_gamma = np.log(gamma)
|
||||
log_gamma1 = np.log(1.-gamma)
|
||||
variance = float(variance)
|
||||
|
||||
|
||||
dvar = np.zeros(1)
|
||||
dmu = np.zeros((N,Q))
|
||||
dS = np.zeros((N,Q))
|
||||
|
|
@ -113,13 +113,13 @@ try:
|
|||
dl = np.zeros(Q)
|
||||
dZ = np.zeros((M,Q))
|
||||
dvar += np.sum(dL_dpsi0)
|
||||
|
||||
|
||||
from ....util.misc import param_to_array
|
||||
S = param_to_array(S)
|
||||
mu = param_to_array(mu)
|
||||
gamma = param_to_array(gamma)
|
||||
Z = param_to_array(Z)
|
||||
|
||||
|
||||
support_code = """
|
||||
#include <math.h>
|
||||
"""
|
||||
|
|
@ -136,16 +136,16 @@ try:
|
|||
double Zm2q = Z(m2,q);
|
||||
double gnq = gamma(n,q);
|
||||
double mu_nq = mu(n,q);
|
||||
|
||||
|
||||
if(m2==0) {
|
||||
// Compute Psi_1
|
||||
// Compute Psi_1
|
||||
double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
|
||||
if(q==0) {dvar(0) += lpsi1/variance;}
|
||||
|
||||
|
||||
double Zmu = Zm1q - mu_nq;
|
||||
double denom = Snq+lq;
|
||||
double Zmu2_denom = Zmu*Zmu/denom;
|
||||
|
||||
|
||||
double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
|
||||
double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
|
||||
double d_exp1,d_exp2;
|
||||
|
|
@ -157,7 +157,7 @@ try:
|
|||
d_exp2 = 1.;
|
||||
}
|
||||
double exp_sum = d_exp1+d_exp2;
|
||||
|
||||
|
||||
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
|
||||
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
|
||||
dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
|
||||
|
|
@ -167,13 +167,13 @@ try:
|
|||
// Compute Psi_2
|
||||
double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
|
||||
if(q==0) {dvar(0) += lpsi2*2/variance;}
|
||||
|
||||
|
||||
double dZm1m2 = Zm1q - Zm2q;
|
||||
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
|
||||
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
|
||||
double denom = 2.*Snq+lq;
|
||||
double muZhat2_denom = muZhat*muZhat/denom;
|
||||
|
||||
|
||||
double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
|
||||
double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
|
||||
double d_exp1,d_exp2;
|
||||
|
|
@ -185,23 +185,23 @@ try:
|
|||
d_exp2 = 1.;
|
||||
}
|
||||
double exp_sum = d_exp1+d_exp2;
|
||||
|
||||
|
||||
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
|
||||
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
|
||||
dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
|
||||
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
|
||||
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
|
||||
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
|
||||
|
||||
|
||||
dl *= 2.*lengthscale
|
||||
if not ARD:
|
||||
dl = dl.sum()
|
||||
|
||||
|
||||
return dvar, dl, dZ, dmu, dS, dgamma
|
||||
|
||||
except:
|
||||
|
|
@ -219,13 +219,13 @@ except:
|
|||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
|
||||
psi0 = np.empty(mu.shape[0])
|
||||
psi0[:] = variance
|
||||
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
|
||||
return psi0, psi1, psi2
|
||||
|
||||
|
||||
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -236,9 +236,9 @@ except:
|
|||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
|
||||
# psi1
|
||||
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
|
||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
|
||||
|
|
@ -251,9 +251,9 @@ except:
|
|||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||
|
||||
|
||||
return _psi1
|
||||
|
||||
|
||||
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -264,14 +264,14 @@ except:
|
|||
# here are the "statistics" for psi2
|
||||
# Produced intermediate results:
|
||||
# _psi2 MxM
|
||||
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
|
||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||
|
||||
|
||||
# psi2
|
||||
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
|
||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||
|
|
@ -284,28 +284,28 @@ except:
|
|||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||
|
||||
|
||||
return _psi2
|
||||
|
||||
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
||||
|
||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
|
||||
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||
|
||||
|
||||
dL_dlengscale = dl_psi1 + dl_psi2
|
||||
if not ARD:
|
||||
dL_dlengscale = dL_dlengscale.sum()
|
||||
|
||||
|
||||
dL_dgamma = dgamma_psi1 + dgamma_psi2
|
||||
dL_dmu = dmu_psi1 + dmu_psi2
|
||||
dL_dS = dS_psi1 + dS_psi2
|
||||
dL_dZ = dZ_psi1 + dZ_psi2
|
||||
|
||||
|
||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
|
||||
|
||||
|
||||
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
dL_dpsi1 - NxM
|
||||
|
|
@ -322,9 +322,9 @@ except:
|
|||
# _dL_dgamma NxQ
|
||||
# _dL_dmu NxQ
|
||||
# _dL_dS NxQ
|
||||
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
|
||||
# psi1
|
||||
_psi1_denom = S / lengthscale2 + 1. # NxQ
|
||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
|
||||
|
|
@ -346,9 +346,9 @@ except:
|
|||
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
|
||||
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
|
||||
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
|
||||
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
|
||||
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
|
||||
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -365,14 +365,14 @@ except:
|
|||
# _dL_dgamma NxQ
|
||||
# _dL_dmu NxQ
|
||||
# _dL_dS NxQ
|
||||
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
|
||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||
|
||||
|
||||
# psi2
|
||||
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
|
||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||
|
|
@ -384,7 +384,7 @@ except:
|
|||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
||||
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
||||
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
|
||||
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
|
||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||
|
|
@ -394,5 +394,5 @@ except:
|
|||
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
|
||||
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
|
||||
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
|
||||
|
||||
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
|
|
|
|||
|
|
@ -59,16 +59,22 @@ class RBF(Stationary):
|
|||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=False)[2]
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=True)[2]
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[:2]
|
||||
self.variance.gradient = dL_dvar
|
||||
self.lengthscale.gradient = dL_dlengscale
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[2]
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[3:]
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
|
|||
import logging
|
||||
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
|
||||
from GPy.core.parameterization.param import Param
|
||||
from GPy.core.parameterization.observable_array import ObsAr
|
||||
|
||||
class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||
"""
|
||||
|
|
@ -80,46 +81,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
|||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
||||
return X.mean.gradient, X.variance.gradient
|
||||
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw):
|
||||
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw)
|
||||
|
||||
if self.has_uncertain_inputs():
|
||||
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
|
||||
variational_posterior=X,
|
||||
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
else:
|
||||
current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], X, Z)
|
||||
current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], X)
|
||||
if subset_indices is not None:
|
||||
value_indices['Xgrad'] = subset_indices['samples']
|
||||
|
||||
kl_fctr = self.kl_factr
|
||||
if self.has_uncertain_inputs():
|
||||
if self.missing_data:
|
||||
d = self.output_dim
|
||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
|
||||
else:
|
||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
|
||||
|
||||
# Subsetting Variational Posterior objects, makes the gradients
|
||||
# empty. We need them to be 0 though:
|
||||
X.mean.gradient[:] = 0
|
||||
X.variance.gradient[:] = 0
|
||||
|
||||
self.variational_prior.update_gradients_KL(X)
|
||||
if self.missing_data:
|
||||
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
|
||||
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
|
||||
else:
|
||||
current_values['meangrad'] += kl_fctr*X.mean.gradient
|
||||
current_values['vargrad'] += kl_fctr*X.variance.gradient
|
||||
|
||||
if subset_indices is not None:
|
||||
value_indices['meangrad'] = subset_indices['samples']
|
||||
value_indices['vargrad'] = subset_indices['samples']
|
||||
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
|
||||
posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
|
||||
return posterior, log_marginal_likelihood, grad_dict
|
||||
|
||||
def _outer_values_update(self, full_values):
|
||||
"""
|
||||
|
|
@ -128,20 +93,46 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
|||
"""
|
||||
super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
|
||||
if self.has_uncertain_inputs():
|
||||
self.X.mean.gradient = full_values['meangrad']
|
||||
self.X.variance.gradient = full_values['vargrad']
|
||||
meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'],
|
||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||
|
||||
kl_fctr = self.kl_factr
|
||||
|
||||
self.X.mean.gradient[:] = 0
|
||||
self.X.variance.gradient[:] = 0
|
||||
self.variational_prior.update_gradients_KL(self.X)
|
||||
|
||||
if self.missing_data or not self.stochastics:
|
||||
self.X.mean.gradient = kl_fctr*self.X.mean.gradient
|
||||
self.X.variance.gradient = kl_fctr*self.X.variance.gradient
|
||||
else:
|
||||
d = self.output_dim
|
||||
self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
|
||||
self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
|
||||
self.X.mean.gradient += meangrad_tmp
|
||||
self.X.variance.gradient += vargrad_tmp
|
||||
|
||||
else:
|
||||
self.X.gradient = full_values['Xgrad']
|
||||
self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
|
||||
self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
|
||||
|
||||
def _outer_init_full_values(self):
|
||||
if self.has_uncertain_inputs():
|
||||
return dict(meangrad=np.zeros(self.X.mean.shape),
|
||||
vargrad=np.zeros(self.X.variance.shape))
|
||||
else:
|
||||
return dict(Xgrad=np.zeros(self.X.shape))
|
||||
full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
|
||||
return full_values
|
||||
|
||||
def parameters_changed(self):
|
||||
super(BayesianGPLVMMiniBatch,self).parameters_changed()
|
||||
kl_fctr = self.kl_factr
|
||||
if self.missing_data or not self.stochastics:
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
||||
elif self.stochastics:
|
||||
d = self.output_dim
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
|
||||
|
||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
|
||||
if stochastic and missing_data:
|
||||
self.missing_data = True
|
||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||
elif stochastic and not missing_data:
|
||||
self.missing_data = False
|
||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||
elif missing_data:
|
||||
self.missing_data = True
|
||||
self.stochastics = SparseGPMissing(self)
|
||||
|
|
@ -81,7 +81,7 @@ class SparseGPMiniBatch(SparseGP):
|
|||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs):
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs):
|
||||
"""
|
||||
This is the standard part, which usually belongs in parameters_changed.
|
||||
|
||||
|
|
@ -100,47 +100,13 @@ class SparseGPMiniBatch(SparseGP):
|
|||
like them into this dictionary for inner use of the indices inside the
|
||||
algorithm.
|
||||
"""
|
||||
try:
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs)
|
||||
except:
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
|
||||
current_values = {}
|
||||
likelihood.update_gradients(grad_dict['dL_dthetaL'])
|
||||
current_values['likgrad'] = likelihood.gradient.copy()
|
||||
if subset_indices is None:
|
||||
subset_indices = {}
|
||||
if isinstance(X, VariationalPosterior):
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = grad_dict['dL_dKmm']
|
||||
kern.update_gradients_full(dL_dKmm, Z, None)
|
||||
current_values['kerngrad'] = kern.gradient.copy()
|
||||
kern.update_gradients_expectations(variational_posterior=X,
|
||||
Z=Z,
|
||||
dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
|
||||
#gradients wrt Z
|
||||
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
|
||||
current_values['Zgrad'] += kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi0'],
|
||||
grad_dict['dL_dpsi1'],
|
||||
grad_dict['dL_dpsi2'],
|
||||
Z=Z,
|
||||
variational_posterior=X)
|
||||
if psi2 is None:
|
||||
psi2_sum_n = None
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
|
||||
current_values['kerngrad'] = kern.gradient.copy()
|
||||
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
#gradients wrt Z
|
||||
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
|
||||
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
|
||||
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
|
||||
psi2_sum_n = psi2.sum(axis=0)
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
|
||||
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
|
||||
return posterior, log_marginal_likelihood, grad_dict
|
||||
|
||||
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
||||
"""
|
||||
|
|
@ -174,7 +140,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
else:
|
||||
index = slice(None)
|
||||
if key in full_values:
|
||||
full_values[key][index] += current_values[key]
|
||||
try:
|
||||
full_values[key][index] += current_values[key]
|
||||
except:
|
||||
full_values[key] += current_values[key]
|
||||
else:
|
||||
full_values[key] = current_values[key]
|
||||
|
||||
|
|
@ -193,9 +162,43 @@ class SparseGPMiniBatch(SparseGP):
|
|||
Here you put the values, which were collected before in the right places.
|
||||
E.g. set the gradients of parameters, etc.
|
||||
"""
|
||||
self.likelihood.gradient = full_values['likgrad']
|
||||
self.kern.gradient = full_values['kerngrad']
|
||||
self.Z.gradient = full_values['Zgrad']
|
||||
if self.has_uncertain_inputs():
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = full_values['dL_dKmm']
|
||||
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
||||
kgrad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'],
|
||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||
self.kern.gradient += kgrad
|
||||
|
||||
|
||||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||
self.Z.gradient += self.kern.gradients_Z_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'],
|
||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
|
||||
kgrad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
|
||||
kgrad += self.kern.gradient
|
||||
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
|
||||
self.kern.gradient += kgrad
|
||||
#kgrad += self.kern.gradient
|
||||
|
||||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
|
||||
self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
|
||||
|
||||
self.likelihood.update_gradients(full_values['dL_dthetaL'])
|
||||
|
||||
def _outer_init_full_values(self):
|
||||
"""
|
||||
|
|
@ -210,7 +213,15 @@ class SparseGPMiniBatch(SparseGP):
|
|||
to initialize the gradients for the mean and the variance in order to
|
||||
have the full gradient for indexing)
|
||||
"""
|
||||
return {}
|
||||
retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0])))
|
||||
if self.has_uncertain_inputs():
|
||||
retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]),
|
||||
dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])),
|
||||
dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))))
|
||||
else:
|
||||
retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]),
|
||||
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))})
|
||||
return retd
|
||||
|
||||
def _outer_loop_for_missing_data(self):
|
||||
Lm = None
|
||||
|
|
@ -232,28 +243,36 @@ class SparseGPMiniBatch(SparseGP):
|
|||
print(message, end=' ')
|
||||
|
||||
for d, ninan in self.stochastics.d:
|
||||
|
||||
if not self.stochastics:
|
||||
print(' '*(len(message)) + '\r', end=' ')
|
||||
message = m_f(d)
|
||||
print(message, end=' ')
|
||||
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, current_values, value_indices = self._inner_parameters_changed(
|
||||
psi0ni = self.psi0[ninan]
|
||||
psi1ni = self.psi1[ninan]
|
||||
if self.has_uncertain_inputs():
|
||||
psi2ni = self.psi2[ninan]
|
||||
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan)
|
||||
else:
|
||||
psi2ni = None
|
||||
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
|
||||
|
||||
posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed(
|
||||
self.kern, self.X[ninan],
|
||||
self.Z, self.likelihood,
|
||||
self.Y_normalized[ninan][:, d], self.Y_metadata,
|
||||
Lm, dL_dKmm,
|
||||
subset_indices=dict(outputs=d, samples=ninan))
|
||||
psi0=psi0ni, psi1=psi1ni, psi2=psi2ni)
|
||||
|
||||
self._inner_take_over_or_update(self.full_values, current_values, value_indices)
|
||||
self._inner_values_update(current_values)
|
||||
# Fill out the full values by adding in the apporpriate grad_dict
|
||||
# values
|
||||
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
|
||||
self._inner_values_update(grad_dict) # What is this for? -> MRD
|
||||
|
||||
Lm = posterior.K_chol
|
||||
dL_dKmm = grad_dict['dL_dKmm']
|
||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
|
||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
|
||||
if not self.stochastics:
|
||||
print('')
|
||||
|
||||
|
|
@ -261,10 +280,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||
self._outer_values_update(self.full_values)
|
||||
if self.has_uncertain_inputs():
|
||||
self.kern.return_psi2_n = False
|
||||
|
||||
def _outer_loop_without_missing_data(self):
|
||||
self._log_marginal_likelihood = 0
|
||||
|
||||
if self.posterior is None:
|
||||
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||
|
|
@ -272,17 +291,16 @@ class SparseGPMiniBatch(SparseGP):
|
|||
woodbury_inv = self.posterior._woodbury_inv
|
||||
woodbury_vector = self.posterior._woodbury_vector
|
||||
|
||||
d = self.stochastics.d
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, self.full_values, _ = self._inner_parameters_changed(
|
||||
d = self.stochastics.d[0][0]
|
||||
posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
|
||||
self.kern, self.X,
|
||||
self.Z, self.likelihood,
|
||||
self.Y_normalized[:, d], self.Y_metadata)
|
||||
self.grad_dict = grad_dict
|
||||
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
self._log_marginal_likelihood = log_marginal_likelihood
|
||||
|
||||
self._outer_values_update(self.full_values)
|
||||
self._outer_values_update(self.grad_dict)
|
||||
|
||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
|
||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||
|
|
@ -291,10 +309,23 @@ class SparseGPMiniBatch(SparseGP):
|
|||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||
|
||||
def parameters_changed(self):
|
||||
#Compute the psi statistics for N once, but don't sum out N in psi2
|
||||
if self.has_uncertain_inputs():
|
||||
#psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
|
||||
#psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
|
||||
#psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
|
||||
self.psi0 = self.kern.psi0(self.Z, self.X)
|
||||
self.psi1 = self.kern.psi1(self.Z, self.X)
|
||||
self.psi2 = self.kern.psi2n(self.Z, self.X)
|
||||
else:
|
||||
self.psi0 = self.kern.Kdiag(self.X)
|
||||
self.psi1 = self.kern.K(self.X, self.Z)
|
||||
self.psi2 = None
|
||||
|
||||
if self.missing_data:
|
||||
self._outer_loop_for_missing_data()
|
||||
elif self.stochastics:
|
||||
self._outer_loop_without_missing_data()
|
||||
else:
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self._outer_values_update(self.full_values)
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self._outer_values_update(self.grad_dict)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue