From 12c335c62e65593b9741028b81d053400f674195 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 25 Aug 2015 17:30:09 +0300 Subject: [PATCH 01/13] Fixed array2string bug for N > 1000 default printing --- GPy/inference/optimization/stochastics.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/GPy/inference/optimization/stochastics.py b/GPy/inference/optimization/stochastics.py index 645c5f64..f7ad1245 100644 --- a/GPy/inference/optimization/stochastics.py +++ b/GPy/inference/optimization/stochastics.py @@ -5,7 +5,7 @@ class StochasticStorage(object): ''' This is a container for holding the stochastic parameters, such as subset indices or step length and so on. - + self.d has to be a list of lists: [dimension indices, nan indices for those dimensions] so that the minibatches can be used as efficiently as possible.10 @@ -38,16 +38,17 @@ class SparseGPMissing(StochasticStorage): import numpy as np self.Y = model.Y_normalized bdict = {} + #For N > 1000 array2string default crops + opt = np.get_printoptions() + np.set_printoptions(threshold='nan') for d in range(self.Y.shape[1]): - inan = np.isnan(self.Y[:, d]) - arr_str = np.array2string(inan, - np.inf, 0, - True, '', - formatter={'bool':lambda x: '1' if x else '0'}) + inan = np.isnan(self.Y)[:, d] + arr_str = np.array2string(~inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'}) try: bdict[arr_str][0].append(d) except: bdict[arr_str] = [[d], ~inan] + np.set_printoptions(**opt) self.d = bdict.values() class SparseGPStochastics(StochasticStorage): @@ -70,16 +71,19 @@ class SparseGPStochastics(StochasticStorage): import numpy as np self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False) bdict = {} + opt = np.get_printoptions() + np.set_printoptions(threshold='nan') for d in self.d: inan = np.isnan(self.Y[:, d]) - arr_str = int(np.array2string(inan, - np.inf, 0, - True, '', + arr_str = int(np.array2string(inan, + np.inf, 0, + True, '', formatter={'bool':lambda x: '1' if x else '0'}), 2) try: bdict[arr_str][0].append(d) except: bdict[arr_str] = [[d], ~inan] + np.set_printoptions(**opt) self.d = bdict.values() def reset(self): From 4143f005403808d11928b9772a9a40679a4d441e Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Thu, 27 Aug 2015 17:56:08 +0300 Subject: [PATCH 02/13] Optimizing missing data model, needs tidying but now much faster --- .../latent_function_inference/var_dtc.py | 27 +++-- GPy/kern/_src/kern.py | 27 +++-- GPy/kern/_src/kernel_slice_operations.py | 18 ++- GPy/kern/_src/psi_comp/__init__.py | 21 ++-- GPy/kern/_src/psi_comp/linear_psi_comp.py | 17 +-- GPy/kern/_src/psi_comp/rbf_psi_comp.py | 29 +++-- GPy/kern/_src/psi_comp/ssrbf_psi_comp.py | 108 ++++++++--------- GPy/kern/_src/rbf.py | 17 +-- GPy/models/bayesian_gplvm_minibatch.py | 95 ++++++++------- GPy/models/sparse_gp_minibatch.py | 111 +++++++++++------- 10 files changed, 275 insertions(+), 195 deletions(-) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 97d8dfe3..e50daa24 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -64,9 +64,7 @@ class VarDTC(LatentFunctionInference): def get_VVTfactor(self, Y, prec): return Y * prec # TODO chache this, and make it effective - - - def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None): + def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None): _, output_dim = Y.shape uncertain_inputs = isinstance(X, VariationalPosterior) @@ -95,17 +93,28 @@ class VarDTC(LatentFunctionInference): # The rather complex computations of A, and the psi stats if uncertain_inputs: - psi0 = kern.psi0(Z, X) - psi1 = kern.psi1(Z, X) + if psi0 is None: + psi0 = kern.psi0(Z, X) + if psi1 is None: + psi1 = kern.psi1(Z, X) if het_noise: - psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0) + if psi2 is None: + assert len(psi2.shape) == 3 # Need to have not summed out N + #FIXME: Need testing + psi2_beta = np.sum([psi2[X[i:i+1,:], :, :] * beta_i for i,beta_i in enumerate(beta)],0) + else: + psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0) else: - psi2_beta = kern.psi2(Z,X) * beta + if psi2 is None: + psi2 = kern.psi2(Z,X) + psi2_beta = psi2 * beta LmInv = dtrtri(Lm) A = LmInv.dot(psi2_beta.dot(LmInv.T)) else: - psi0 = kern.Kdiag(X) - psi1 = kern.K(X, Z) + if psi0 is None: + psi0 = kern.Kdiag(X) + if psi1 is None: + psi1 = kern.K(X, Z) if het_noise: tmp = psi1 * (np.sqrt(beta)) else: diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 5af3fb1c..fb12030b 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -59,7 +59,12 @@ class Kern(Parameterized): self._sliced_X = 0 self.useGPU = self._support_GPU and useGPU self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool) - + + #FIXME: temporary solution + from ...core.parameterization.lists_and_dicts import ObserverList + self._return_psi2_n_flag.observers = ObserverList() + self._return_psi2_n_flag._update_on = True + from .psi_comp import PSICOMP_GH self.psicomp = PSICOMP_GH() @@ -97,7 +102,7 @@ class Kern(Parameterized): def psi1(self, Z, variational_posterior): return self.psicomp.psicomputations(self, Z, variational_posterior)[1] def psi2(self, Z, variational_posterior): - return self.psicomp.psicomputations(self, Z, variational_posterior)[2] + return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2] def gradients_X(self, dL_dK, X, X2): raise NotImplementedError def gradients_X_diag(self, dL_dKdiag, X): @@ -111,7 +116,8 @@ class Kern(Parameterized): """Set the gradients of all parameters when doing full (N) inference.""" raise NotImplementedError - def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): """ Set the gradients of all parameters when doing inference with uncertain inputs, using expectations of the kernel. @@ -122,22 +128,27 @@ class Kern(Parameterized): dL_dpsi1 * dpsi1_d{theta_i} + dL_dpsi2 * dpsi2_d{theta_i} """ - dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0] + dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=psi0, psi1=psi1, psi2=psi2)[0] self.gradient[:] = dtheta - def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): """ Returns the derivative of the objective wrt Z, using the chain rule through the expectation variables. """ - return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1] + return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=psi0, psi1=psi1, psi2=psi2)[1] - def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): """ Compute the gradients wrt the parameters of the variational distruibution q(X), chain-ruling via the expectations of the kernel """ - return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:] + return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=psi0, psi1=psi1, psi2=psi2)[2:] def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs): """ diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index 3473ffce..a2058ca8 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -116,25 +116,31 @@ def _slice_psi(f): def _slice_update_gradients_expectations(f): @wraps(f) - def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): with _Slice_wrap(self, Z, variational_posterior) as s: - ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2) + ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2, + psi0=psi0, psi1=psi1, psi2=psi2) return ret return wrap def _slice_gradients_Z_expectations(f): @wraps(f) - def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): with _Slice_wrap(self, Z, variational_posterior) as s: - ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)) + ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2, + psi0=psi0, psi1=psi1, psi2=psi2)) return ret return wrap def _slice_gradients_qX_expectations(f): @wraps(f) - def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): with _Slice_wrap(self, variational_posterior, Z) as s: - ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X)) + ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X, + psi0=psi0, psi1=psi1, psi2=psi2)) r2 = ret[:2] ret[0] = s.handle_return_array(r2[0]) ret[1] = s.handle_return_array(r2[1]) diff --git a/GPy/kern/_src/psi_comp/__init__.py b/GPy/kern/_src/psi_comp/__init__.py index 6d7222be..6849ed81 100644 --- a/GPy/kern/_src/psi_comp/__init__.py +++ b/GPy/kern/_src/psi_comp/__init__.py @@ -12,18 +12,22 @@ from .gaussherm import PSICOMP_GH class PSICOMP_RBF(Pickleable): @Cache_this(limit=10, ignore_args=(0,)) - def psicomputations(self, variance, lengthscale, Z, variational_posterior): + def psicomputations(self, variance, lengthscale, Z, variational_posterior, return_psi2_n=False): if isinstance(variational_posterior, variational.NormalPosterior): - return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) + return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=return_psi2_n) elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + if return_psi2_n: + raise NotImplementedError('However this function seems to return it by default') return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) else: raise ValueError("unknown distriubtion received for psi-statistics") @Cache_this(limit=10, ignore_args=(0,1,2,3)) - def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): if isinstance(variational_posterior, variational.NormalPosterior): - return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) + return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, + psi0=psi0, psi1=psi1, psi2=psi2) elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) else: @@ -35,7 +39,9 @@ class PSICOMP_RBF(Pickleable): class PSICOMP_Linear(Pickleable): @Cache_this(limit=10, ignore_args=(0,)) - def psicomputations(self, variance, Z, variational_posterior): + def psicomputations(self, variance, Z, variational_posterior, return_psi2_n=False): + if return_psi2_n: + raise NotImplementedError if isinstance(variational_posterior, variational.NormalPosterior): return linear_psi_comp.psicomputations(variance, Z, variational_posterior) elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): @@ -44,8 +50,9 @@ class PSICOMP_Linear(Pickleable): raise ValueError("unknown distriubtion received for psi-statistics") @Cache_this(limit=10, ignore_args=(0,1,2,3)) - def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior): + def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior, psi0=None, psi1=None, psi2=None): if isinstance(variational_posterior, variational.NormalPosterior): + #Should pass psi in return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior) elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior) @@ -53,4 +60,4 @@ class PSICOMP_Linear(Pickleable): raise ValueError("unknown distriubtion received for psi-statistics") def _setup_observers(self): - pass \ No newline at end of file + pass diff --git a/GPy/kern/_src/psi_comp/linear_psi_comp.py b/GPy/kern/_src/psi_comp/linear_psi_comp.py index 50090428..cd5d8fa0 100644 --- a/GPy/kern/_src/psi_comp/linear_psi_comp.py +++ b/GPy/kern/_src/psi_comp/linear_psi_comp.py @@ -8,7 +8,7 @@ The package for the Psi statistics computation of the linear kernel for Bayesian import numpy as np from ....util.linalg import tdot -def psicomputations(variance, Z, variational_posterior): +def psicomputations(variance, Z, variational_posterior, return_psi2_n=False): """ Compute psi-statistics for ss-linear kernel """ @@ -22,7 +22,10 @@ def psicomputations(variance, Z, variational_posterior): psi0 = (variance*(np.square(mu)+S)).sum(axis=1) psi1 = np.dot(mu,(variance*Z).T) - psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T) + if sum_N_psi2: + psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T) + else: + raise NotImplementedError return psi0, psi1, psi2 @@ -40,7 +43,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati dL_dmu += 2.*dL_dpsi0_var*mu+np.dot(dL_dpsi1,Z)*variance dL_dS += dL_dpsi0_var dL_dZ += dL_dpsi1_mu*variance - + return dL_dvar, dL_dZ, dL_dmu, dL_dS def _psi2computations(dL_dpsi2, variance, Z, mu, S): @@ -56,7 +59,7 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S): # _psi2_dZ MxQ # _psi2_dmu NxQ # _psi2_dS NxQ - + variance2 = np.square(variance) common_sum = np.dot(mu,(variance*Z).T) Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0) @@ -66,12 +69,12 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S): Z1_expect = np.dot(dL_dpsi2T,Z) dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0) - + dL_dmu = common_expect*variance - + dL_dS = np.empty(S.shape) dL_dS[:] = Z_expect*variance2 - + dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu) return dL_dvar, dL_dmu, dL_dS, dL_dZ diff --git a/GPy/kern/_src/psi_comp/rbf_psi_comp.py b/GPy/kern/_src/psi_comp/rbf_psi_comp.py index 16bada72..de958a37 100644 --- a/GPy/kern/_src/psi_comp/rbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/rbf_psi_comp.py @@ -5,7 +5,7 @@ The module for psi-statistics for RBF kernel import numpy as np from GPy.util.caching import Cacher -def psicomputations(variance, lengthscale, Z, variational_posterior): +def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=False): """ Z - MxQ mu - NxQ @@ -21,7 +21,9 @@ def psicomputations(variance, lengthscale, Z, variational_posterior): psi0 = np.empty(mu.shape[0]) psi0[:] = variance psi1 = _psi1computations(variance, lengthscale, Z, mu, S) - psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0) + psi2 = _psi2computations(variance, lengthscale, Z, mu, S) + if not return_psi2_n: + psi2 = psi2.sum(axis=0) return psi0, psi1, psi2 def __psi1computations(variance, lengthscale, Z, mu, S): @@ -66,11 +68,12 @@ def __psi2computations(variance, lengthscale, Z, mu, S): _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) return _psi2 -def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): +def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): ARD = (len(lengthscale)!=1) - dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) - dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi1=psi1) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi2=psi2) dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 @@ -84,7 +87,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS -def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): +def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None): """ dL_dpsi1 - NxM Z - MxQ @@ -103,8 +106,9 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): lengthscale2 = np.square(lengthscale) - _psi1 = _psi1computations(variance, lengthscale, Z, mu, S) - Lpsi1 = dL_dpsi1*_psi1 + if psi1 is None: + psi1 = _psi1computations(variance, lengthscale, Z, mu, S) + Lpsi1 = dL_dpsi1*psi1 Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ denom = 1./(S+lengthscale2) Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ @@ -116,7 +120,7 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS -def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): +def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None): """ Z - MxQ mu - NxQ @@ -137,8 +141,9 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): denom = 1./(2*S+lengthscale2) denom2 = np.square(denom) - _psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM - Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out + if psi2 is None: + psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM + Lpsi2 = dL_dpsi2*psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ @@ -158,3 +163,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): _psi1computations = Cacher(__psi1computations, limit=5) _psi2computations = Cacher(__psi2computations, limit=5) +_psi1compDer = Cacher(__psi1compDer, limit=5) +_psi2compDer = Cacher(__psi2compDer, limit=5) diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py index f6a24c86..10ea95e4 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py @@ -9,7 +9,7 @@ import numpy as np try: from scipy import weave - + def _psicomputations(variance, lengthscale, Z, variational_posterior): """ Z - MxQ @@ -23,7 +23,7 @@ try: mu = variational_posterior.mean S = variational_posterior.variance gamma = variational_posterior.binary_prob - + N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] l2 = np.square(lengthscale) log_denom1 = np.log(S/l2+1) @@ -35,13 +35,13 @@ try: psi0[:] = variance psi1 = np.empty((N,M)) psi2n = np.empty((N,M,M)) - + from ....util.misc import param_to_array S = param_to_array(S) mu = param_to_array(mu) gamma = param_to_array(gamma) Z = param_to_array(Z) - + support_code = """ #include """ @@ -56,11 +56,11 @@ try: double lq = l2(q); double Zm1q = Z(m1,q); double Zm2q = Z(m2,q); - + if(m2==0) { // Compute Psi_1 double muZ = mu(n,q)-Z(m1,q); - + double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.; double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq); log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2)); @@ -69,10 +69,10 @@ try: double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.; double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q; double dZ = Zm1q - Zm2q; - + double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q); double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq); - log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2)); + log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2)); } double exp_psi2_n = exp(log_psi2_n); psi2n(n,m1,m2) = variance*variance*exp_psi2_n; @@ -83,18 +83,18 @@ try: } """ weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) - + psi2 = psi2n.sum(axis=0) return psi0,psi1,psi2,psi2n - + from GPy.util.caching import Cacher psicomputations = Cacher(_psicomputations, limit=1) - + def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): ARD = (len(lengthscale)!=1) - + _,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior) - + mu = variational_posterior.mean S = variational_posterior.variance gamma = variational_posterior.binary_prob @@ -105,7 +105,7 @@ try: log_gamma = np.log(gamma) log_gamma1 = np.log(1.-gamma) variance = float(variance) - + dvar = np.zeros(1) dmu = np.zeros((N,Q)) dS = np.zeros((N,Q)) @@ -113,13 +113,13 @@ try: dl = np.zeros(Q) dZ = np.zeros((M,Q)) dvar += np.sum(dL_dpsi0) - + from ....util.misc import param_to_array S = param_to_array(S) mu = param_to_array(mu) gamma = param_to_array(gamma) Z = param_to_array(Z) - + support_code = """ #include """ @@ -136,16 +136,16 @@ try: double Zm2q = Z(m2,q); double gnq = gamma(n,q); double mu_nq = mu(n,q); - + if(m2==0) { - // Compute Psi_1 + // Compute Psi_1 double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1); if(q==0) {dvar(0) += lpsi1/variance;} - + double Zmu = Zm1q - mu_nq; double denom = Snq+lq; double Zmu2_denom = Zmu*Zmu/denom; - + double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.); double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq); double d_exp1,d_exp2; @@ -157,7 +157,7 @@ try: d_exp2 = 1.; } double exp_sum = d_exp1+d_exp2; - + dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum); dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.; dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; @@ -167,13 +167,13 @@ try: // Compute Psi_2 double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2); if(q==0) {dvar(0) += lpsi2*2/variance;} - + double dZm1m2 = Zm1q - Zm2q; double Z2 = Zm1q*Zm1q+Zm2q*Zm2q; double muZhat = mu_nq - (Zm1q + Zm2q)/2.; double denom = 2.*Snq+lq; double muZhat2_denom = muZhat*muZhat/denom; - + double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q); double exp2 = log_gamma1(n,q) - Z2/(2.*lq); double d_exp1,d_exp2; @@ -185,23 +185,23 @@ try: d_exp2 = 1.; } double exp_sum = d_exp1+d_exp2; - + dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum; dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum; dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum; - dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; + dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; } } } } """ weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) - + dl *= 2.*lengthscale if not ARD: dl = dl.sum() - + return dvar, dl, dZ, dmu, dS, dgamma except: @@ -219,13 +219,13 @@ except: mu = variational_posterior.mean S = variational_posterior.variance gamma = variational_posterior.binary_prob - + psi0 = np.empty(mu.shape[0]) psi0[:] = variance psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) return psi0, psi1, psi2 - + def _psi1computations(variance, lengthscale, Z, mu, S, gamma): """ Z - MxQ @@ -236,9 +236,9 @@ except: # here are the "statistics" for psi1 # Produced intermediate results: # _psi1 NxM - + lengthscale2 = np.square(lengthscale) - + # psi1 _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ @@ -251,9 +251,9 @@ except: _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM _psi1 = variance * np.exp(_psi1_exp_sum) # NxM - + return _psi1 - + def _psi2computations(variance, lengthscale, Z, mu, S, gamma): """ Z - MxQ @@ -264,14 +264,14 @@ except: # here are the "statistics" for psi2 # Produced intermediate results: # _psi2 MxM - + lengthscale2 = np.square(lengthscale) - + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - + # psi2 _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ _psi2_denom_sqrt = np.sqrt(_psi2_denom) @@ -284,28 +284,28 @@ except: _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM - + return _psi2 - + def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): ARD = (len(lengthscale)!=1) - + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - + dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 - + dL_dlengscale = dl_psi1 + dl_psi2 if not ARD: dL_dlengscale = dL_dlengscale.sum() - + dL_dgamma = dgamma_psi1 + dgamma_psi2 dL_dmu = dmu_psi1 + dmu_psi2 dL_dS = dS_psi1 + dS_psi2 dL_dZ = dZ_psi1 + dZ_psi2 - + return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma - + def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): """ dL_dpsi1 - NxM @@ -322,9 +322,9 @@ except: # _dL_dgamma NxQ # _dL_dmu NxQ # _dL_dS NxQ - + lengthscale2 = np.square(lengthscale) - + # psi1 _psi1_denom = S / lengthscale2 + 1. # NxQ _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ @@ -346,9 +346,9 @@ except: _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) - - return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma - + + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma + def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): """ Z - MxQ @@ -365,14 +365,14 @@ except: # _dL_dgamma NxQ # _dL_dmu NxQ # _dL_dS NxQ - + lengthscale2 = np.square(lengthscale) - + _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ - + # psi2 _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ _psi2_denom_sqrt = np.sqrt(_psi2_denom) @@ -384,7 +384,7 @@ except: _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM - _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ + _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM @@ -394,5 +394,5 @@ except: _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) - + return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 73f2d0a4..357c48ec 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -56,16 +56,19 @@ class RBF(Stationary): return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] def psi2(self, Z, variational_posterior): - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2] + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2] - def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2] + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): + dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[:2] self.variance.gradient = dL_dvar self.lengthscale.gradient = dL_dlengscale - def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2] + def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): + return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[2] - def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:] + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, + psi0=None, psi1=None, psi2=None): + return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[3:] diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 7a7bb0e8..47c425a4 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -80,45 +80,9 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): """Get the gradients of the posterior distribution of X in its specific form.""" return X.mean.gradient, X.variance.gradient - def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw): - posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw) - - if self.has_uncertain_inputs(): - current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations( - variational_posterior=X, - Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'], - dL_dpsi1=grad_dict['dL_dpsi1'], - dL_dpsi2=grad_dict['dL_dpsi2']) - else: - current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], X, Z) - current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], X) - if subset_indices is not None: - value_indices['Xgrad'] = subset_indices['samples'] - - kl_fctr = self.kl_factr - if self.has_uncertain_inputs(): - if self.missing_data: - d = self.output_dim - log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d - else: - log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X) - - # Subsetting Variational Posterior objects, makes the gradients - # empty. We need them to be 0 though: - X.mean.gradient[:] = 0 - X.variance.gradient[:] = 0 - - self.variational_prior.update_gradients_KL(X) - if self.missing_data: - current_values['meangrad'] += kl_fctr*X.mean.gradient/d - current_values['vargrad'] += kl_fctr*X.variance.gradient/d - else: - current_values['meangrad'] += kl_fctr*X.mean.gradient - current_values['vargrad'] += kl_fctr*X.variance.gradient - - if subset_indices is not None: - value_indices['meangrad'] = subset_indices['samples'] - value_indices['vargrad'] = subset_indices['samples'] + def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, psi0=None, psi1=None, psi2=None, **kw): + posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, + psi0=psi0, psi1=psi1, psi2=psi2, subset_indices=subset_indices, **kw) return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices def _outer_values_update(self, full_values): @@ -127,6 +91,47 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): E.g. set the gradients of parameters, etc. """ super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) + current_values = full_values + grad_dict = current_values + if self.has_uncertain_inputs(): + current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations( + variational_posterior=self.X, + Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], + dL_dpsi1=grad_dict['dL_dpsi1'], + dL_dpsi2=grad_dict['dL_dpsi2'], + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) + else: + current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], self.X, self.Z) + current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], self.X) + if subset_indices is not None: + value_indices['Xgrad'] = subset_indices['samples'] + + kl_fctr = self.kl_factr + if self.has_uncertain_inputs(): + #if self.missing_data: + #d = self.output_dim + #log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)/d + #else: + self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) + + # Subsetting Variational Posterior objects, makes the gradients + # empty. We need them to be 0 though: + self.X.mean.gradient[:] = 0 + self.X.variance.gradient[:] = 0 + + self.variational_prior.update_gradients_KL(self.X) + #if self.missing_data: + #current_values['meangrad'] += kl_fctr*self.X.mean.gradient/d + #current_values['vargrad'] += kl_fctr*self.X.variance.gradient/d + #else: + current_values['meangrad'] += kl_fctr*self.X.mean.gradient + current_values['vargrad'] += kl_fctr*self.X.variance.gradient + + #if subset_indices is not None: + #value_indices['meangrad'] = subset_indices['samples'] + #value_indices['vargrad'] = subset_indices['samples'] + + full_values = current_values if self.has_uncertain_inputs(): self.X.mean.gradient = full_values['meangrad'] self.X.variance.gradient = full_values['vargrad'] @@ -134,11 +139,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): self.X.gradient = full_values['Xgrad'] def _outer_init_full_values(self): - if self.has_uncertain_inputs(): - return dict(meangrad=np.zeros(self.X.mean.shape), - vargrad=np.zeros(self.X.variance.shape)) - else: - return dict(Xgrad=np.zeros(self.X.shape)) + full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values() + #if self.has_uncertain_inputs(): + #return dict(meangrad=np.zeros(self.X.mean.shape), + #vargrad=np.zeros(self.X.variance.shape)) + #else: + #return dict(Xgrad=np.zeros(self.X.shape)) + return full_values def parameters_changed(self): super(BayesianGPLVMMiniBatch,self).parameters_changed() diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 07295255..f8322b4e 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -44,7 +44,7 @@ class SparseGPMiniBatch(SparseGP): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False, missing_data=False, stochastic=False, batchsize=1): - + # pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): @@ -80,7 +80,7 @@ class SparseGPMiniBatch(SparseGP): def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) - def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs): + def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, psi0=None, psi1=None, psi2=None, missing_inds=None, **kwargs): """ This is the standard part, which usually belongs in parameters_changed. @@ -99,46 +99,22 @@ class SparseGPMiniBatch(SparseGP): like them into this dictionary for inner use of the indices inside the algorithm. """ - try: - posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs) - except: - posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata) + if psi2 is None: + psi2_sum_n = None + else: + psi2_sum_n = psi2.sum(axis=0) + posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs) current_values = {} likelihood.update_gradients(grad_dict['dL_dthetaL']) current_values['likgrad'] = likelihood.gradient.copy() if subset_indices is None: subset_indices = {} - if isinstance(X, VariationalPosterior): - #gradients wrt kernel - dL_dKmm = grad_dict['dL_dKmm'] - kern.update_gradients_full(dL_dKmm, Z, None) - current_values['kerngrad'] = kern.gradient.copy() - kern.update_gradients_expectations(variational_posterior=X, - Z=Z, - dL_dpsi0=grad_dict['dL_dpsi0'], - dL_dpsi1=grad_dict['dL_dpsi1'], - dL_dpsi2=grad_dict['dL_dpsi2']) - current_values['kerngrad'] += kern.gradient + current_values['dL_dpsi0'] = grad_dict['dL_dpsi0'] + current_values['dL_dpsi1'] = grad_dict['dL_dpsi1'] + current_values['dL_dpsi2'] = grad_dict['dL_dpsi2'] + current_values['dL_dKmm'] = grad_dict['dL_dKmm'] - #gradients wrt Z - current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z) - current_values['Zgrad'] += kern.gradients_Z_expectations( - grad_dict['dL_dpsi0'], - grad_dict['dL_dpsi1'], - grad_dict['dL_dpsi2'], - Z=Z, - variational_posterior=X) - else: - #gradients wrt kernel - kern.update_gradients_diag(grad_dict['dL_dKdiag'], X) - current_values['kerngrad'] = kern.gradient.copy() - kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z) - current_values['kerngrad'] += kern.gradient - kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None) - current_values['kerngrad'] += kern.gradient - #gradients wrt Z - current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z) - current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X) + #current_values = grad_dict return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None): @@ -192,9 +168,43 @@ class SparseGPMiniBatch(SparseGP): Here you put the values, which were collected before in the right places. E.g. set the gradients of parameters, etc. """ - self.likelihood.gradient = full_values['likgrad'] - self.kern.gradient = full_values['kerngrad'] - self.Z.gradient = full_values['Zgrad'] + grad_dict = full_values + current_values = full_values + #current_values = {} + if isinstance(self.X, VariationalPosterior): + #gradients wrt kernel + dL_dKmm = grad_dict['dL_dKmm'] + self.kern.update_gradients_full(dL_dKmm, self.Z, None) + current_values['kerngrad'] = self.kern.gradient.copy() + self.kern.update_gradients_expectations(variational_posterior=self.X, + Z=self.Z, + dL_dpsi0=grad_dict['dL_dpsi0'], + dL_dpsi1=grad_dict['dL_dpsi1'], + dL_dpsi2=grad_dict['dL_dpsi2']) + current_values['kerngrad'] += self.kern.gradient + + #gradients wrt Z + current_values['Zgrad'] = self.kern.gradients_X(dL_dKmm, self.Z) + current_values['Zgrad'] += self.kern.gradients_Z_expectations( + grad_dict['dL_dpsi0'], + grad_dict['dL_dpsi1'], + grad_dict['dL_dpsi2'], + Z=self.Z, + variational_posterior=self.X) + else: + #gradients wrt kernel + kern.update_gradients_diag(grad_dict['dL_dKdiag'], self.X) + current_values['kerngrad'] = self.kern.gradient.copy() + kern.update_gradients_full(grad_dict['dL_dKnm'], self.X, self.Z) + current_values['kerngrad'] += kern.gradient + kern.update_gradients_full(grad_dict['dL_dKmm'], self.Z, None) + current_values['kerngrad'] += kern.gradient + #gradients wrt Z + current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], self.Z) + current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, self.Z, self.X) + self.likelihood.gradient = current_values['likgrad'] + self.kern.gradient = current_values['kerngrad'] + self.Z.gradient = current_values['Zgrad'] def _outer_init_full_values(self): """ @@ -209,7 +219,8 @@ class SparseGPMiniBatch(SparseGP): to initialize the gradients for the mean and the variance in order to have the full gradient for indexing) """ - return {} + return {'dL_dpsi0': np.zeros(self.X.shape[0]), + 'dL_dpsi1': np.zeros((self.X.shape[0], self.Z.shape[0]))} def _outer_loop_for_missing_data(self): Lm = None @@ -230,20 +241,35 @@ class SparseGPMiniBatch(SparseGP): message = m_f(-1) print(message, end=' ') - for d, ninan in self.stochastics.d: + #Compute the psi statistics for N once, but don't sum out N in psi2 + self.kern.return_psi2_n = True + psi0 = self.kern.psi0(self.Z, self.X) + psi1 = self.kern.psi1(self.Z, self.X) + psi2 = self.kern.psi2(self.Z, self.X) + self.psi0 = psi0 + self.psi1 = psi1 + self.psi2 = psi2 + for d, ninan in self.stochastics.d: if not self.stochastics: print(' '*(len(message)) + '\r', end=' ') message = m_f(d) print(message, end=' ') + psi0ni = psi0[ninan] + psi1ni = psi1[ninan] + psi2ni = psi2[ninan] + posterior, log_marginal_likelihood, \ grad_dict, current_values, value_indices = self._inner_parameters_changed( self.kern, self.X[ninan], self.Z, self.likelihood, self.Y_normalized[ninan][:, d], self.Y_metadata, Lm, dL_dKmm, - subset_indices=dict(outputs=d, samples=ninan)) + subset_indices=dict(outputs=d, samples=ninan, + dL_dpsi0=ninan, + dL_dpsi1=ninan), + psi0=psi0ni, psi1=psi1ni, psi2=psi2ni) self._inner_take_over_or_update(self.full_values, current_values, value_indices) self._inner_values_update(current_values) @@ -253,6 +279,7 @@ class SparseGPMiniBatch(SparseGP): woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None] woodbury_vector[:, d] = posterior.woodbury_vector self._log_marginal_likelihood += log_marginal_likelihood + if not self.stochastics: print('') From 5f119d09185ae64bd5474935ceffca644d598881 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Thu, 27 Aug 2015 19:08:11 +0300 Subject: [PATCH 03/13] Tidied but broken --- GPy/models/bayesian_gplvm_minibatch.py | 46 +++------- GPy/models/sparse_gp_minibatch.py | 115 ++++++++++++------------- 2 files changed, 69 insertions(+), 92 deletions(-) diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 47c425a4..b7e0c9b2 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -80,10 +80,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): """Get the gradients of the posterior distribution of X in its specific form.""" return X.mean.gradient, X.variance.gradient - def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, psi0=None, psi1=None, psi2=None, **kw): - posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, - psi0=psi0, psi1=psi1, psi2=psi2, subset_indices=subset_indices, **kw) - return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices + def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None,psi0=None, psi1=None, psi2=None, **kw): + posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, + psi0=psi0, psi1=psi1, psi2=psi2, **kw) + return posterior, log_marginal_likelihood, grad_dict def _outer_values_update(self, full_values): """ @@ -91,27 +91,19 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): E.g. set the gradients of parameters, etc. """ super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) - current_values = full_values - grad_dict = current_values if self.has_uncertain_inputs(): - current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations( + full_values['meangrad'], full_values['vargrad'] = self.kern.gradients_qX_expectations( variational_posterior=self.X, - Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], - dL_dpsi1=grad_dict['dL_dpsi1'], - dL_dpsi2=grad_dict['dL_dpsi2'], + Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], + dL_dpsi1=full_values['dL_dpsi1'], + dL_dpsi2=full_values['dL_dpsi2'], psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) else: - current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], self.X, self.Z) - current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], self.X) - if subset_indices is not None: - value_indices['Xgrad'] = subset_indices['samples'] + full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z) + full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X) kl_fctr = self.kl_factr if self.has_uncertain_inputs(): - #if self.missing_data: - #d = self.output_dim - #log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)/d - #else: self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) # Subsetting Variational Posterior objects, makes the gradients @@ -120,18 +112,9 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): self.X.variance.gradient[:] = 0 self.variational_prior.update_gradients_KL(self.X) - #if self.missing_data: - #current_values['meangrad'] += kl_fctr*self.X.mean.gradient/d - #current_values['vargrad'] += kl_fctr*self.X.variance.gradient/d - #else: - current_values['meangrad'] += kl_fctr*self.X.mean.gradient - current_values['vargrad'] += kl_fctr*self.X.variance.gradient + full_values['meangrad'] += kl_fctr*self.X.mean.gradient + full_values['vargrad'] += kl_fctr*self.X.variance.gradient - #if subset_indices is not None: - #value_indices['meangrad'] = subset_indices['samples'] - #value_indices['vargrad'] = subset_indices['samples'] - - full_values = current_values if self.has_uncertain_inputs(): self.X.mean.gradient = full_values['meangrad'] self.X.variance.gradient = full_values['vargrad'] @@ -140,11 +123,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): def _outer_init_full_values(self): full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values() - #if self.has_uncertain_inputs(): - #return dict(meangrad=np.zeros(self.X.mean.shape), - #vargrad=np.zeros(self.X.variance.shape)) - #else: - #return dict(Xgrad=np.zeros(self.X.shape)) return full_values def parameters_changed(self): diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index f8322b4e..eab8d8f3 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -80,7 +80,7 @@ class SparseGPMiniBatch(SparseGP): def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) - def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, psi0=None, psi1=None, psi2=None, missing_inds=None, **kwargs): + def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs): """ This is the standard part, which usually belongs in parameters_changed. @@ -103,19 +103,10 @@ class SparseGPMiniBatch(SparseGP): psi2_sum_n = None else: psi2_sum_n = psi2.sum(axis=0) - posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs) - current_values = {} - likelihood.update_gradients(grad_dict['dL_dthetaL']) - current_values['likgrad'] = likelihood.gradient.copy() - if subset_indices is None: - subset_indices = {} - current_values['dL_dpsi0'] = grad_dict['dL_dpsi0'] - current_values['dL_dpsi1'] = grad_dict['dL_dpsi1'] - current_values['dL_dpsi2'] = grad_dict['dL_dpsi2'] - current_values['dL_dKmm'] = grad_dict['dL_dKmm'] + posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, + dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs) - #current_values = grad_dict - return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices + return posterior, log_marginal_likelihood, grad_dict def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None): """ @@ -149,7 +140,10 @@ class SparseGPMiniBatch(SparseGP): else: index = slice(None) if key in full_values: - full_values[key][index] += current_values[key] + if np.isscalar(current_values[key]): + full_values[key] += current_values[key] + else: + full_values[key][index] += current_values[key] else: full_values[key] = current_values[key] @@ -168,43 +162,44 @@ class SparseGPMiniBatch(SparseGP): Here you put the values, which were collected before in the right places. E.g. set the gradients of parameters, etc. """ - grad_dict = full_values - current_values = full_values - #current_values = {} - if isinstance(self.X, VariationalPosterior): + if self.has_uncertain_inputs(): #gradients wrt kernel - dL_dKmm = grad_dict['dL_dKmm'] + dL_dKmm = full_values['dL_dKmm'] self.kern.update_gradients_full(dL_dKmm, self.Z, None) - current_values['kerngrad'] = self.kern.gradient.copy() + full_values['kerngrad'] = self.kern.gradient.copy() self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, - dL_dpsi0=grad_dict['dL_dpsi0'], - dL_dpsi1=grad_dict['dL_dpsi1'], - dL_dpsi2=grad_dict['dL_dpsi2']) - current_values['kerngrad'] += self.kern.gradient + dL_dpsi0=full_values['dL_dpsi0'], + dL_dpsi1=full_values['dL_dpsi1'], + dL_dpsi2=full_values['dL_dpsi2']) + full_values['kerngrad'] += self.kern.gradient #gradients wrt Z - current_values['Zgrad'] = self.kern.gradients_X(dL_dKmm, self.Z) - current_values['Zgrad'] += self.kern.gradients_Z_expectations( - grad_dict['dL_dpsi0'], - grad_dict['dL_dpsi1'], - grad_dict['dL_dpsi2'], + full_values['Zgrad'] = self.kern.gradients_X(dL_dKmm, self.Z) + full_values['Zgrad'] += self.kern.gradients_Z_expectations( + full_values['dL_dpsi0'], + full_values['dL_dpsi1'], + full_values['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) else: #gradients wrt kernel - kern.update_gradients_diag(grad_dict['dL_dKdiag'], self.X) - current_values['kerngrad'] = self.kern.gradient.copy() - kern.update_gradients_full(grad_dict['dL_dKnm'], self.X, self.Z) - current_values['kerngrad'] += kern.gradient - kern.update_gradients_full(grad_dict['dL_dKmm'], self.Z, None) - current_values['kerngrad'] += kern.gradient + self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X) + full_values['kerngrad'] = self.kern.gradient.copy() + self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z) + full_values['kerngrad'] += self.kern.gradient + self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None) + full_values['kerngrad'] += self.kern.gradient #gradients wrt Z - current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], self.Z) - current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, self.Z, self.X) - self.likelihood.gradient = current_values['likgrad'] - self.kern.gradient = current_values['kerngrad'] - self.Z.gradient = current_values['Zgrad'] + full_values['Zgrad'] = self.kern.gradients_X(full_values['dL_dKmm'], self.Z) + full_values['Zgrad'] += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X) + + self.likelihood.update_gradients(full_values['dL_dthetaL']) + full_values['likgrad'] = self.likelihood.gradient.copy() + + self.likelihood.gradient = full_values['likgrad'] + self.kern.gradient = full_values['kerngrad'] + self.Z.gradient = full_values['Zgrad'] def _outer_init_full_values(self): """ @@ -242,10 +237,18 @@ class SparseGPMiniBatch(SparseGP): print(message, end=' ') #Compute the psi statistics for N once, but don't sum out N in psi2 - self.kern.return_psi2_n = True - psi0 = self.kern.psi0(self.Z, self.X) - psi1 = self.kern.psi1(self.Z, self.X) - psi2 = self.kern.psi2(self.Z, self.X) + if self.has_uncertain_inputs(): + self.kern.return_psi2_n = True + psi0 = self.kern.psi0(self.Z, self.X) + psi1 = self.kern.psi1(self.Z, self.X) + psi2 = self.kern.psi2(self.Z, self.X) + else: + if psi0 is None: + psi0 = kern.Kdiag(X) + if psi1 is None: + psi1 = kern.K(X, Z) + psi2 = None + self.psi0 = psi0 self.psi1 = psi1 self.psi2 = psi2 @@ -259,23 +262,20 @@ class SparseGPMiniBatch(SparseGP): psi0ni = psi0[ninan] psi1ni = psi1[ninan] psi2ni = psi2[ninan] + value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan) - posterior, log_marginal_likelihood, \ - grad_dict, current_values, value_indices = self._inner_parameters_changed( + posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed( self.kern, self.X[ninan], self.Z, self.likelihood, self.Y_normalized[ninan][:, d], self.Y_metadata, Lm, dL_dKmm, - subset_indices=dict(outputs=d, samples=ninan, - dL_dpsi0=ninan, - dL_dpsi1=ninan), psi0=psi0ni, psi1=psi1ni, psi2=psi2ni) - self._inner_take_over_or_update(self.full_values, current_values, value_indices) - self._inner_values_update(current_values) + # Fill out the full values by adding in the apporpriate grad_dict + # values + self._inner_take_over_or_update(self.full_values, grad_dict, value_indices) + self._inner_values_update(grad_dict) # What is this for? - Lm = posterior.K_chol - dL_dKmm = grad_dict['dL_dKmm'] woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None] woodbury_vector[:, d] = posterior.woodbury_vector self._log_marginal_likelihood += log_marginal_likelihood @@ -299,8 +299,7 @@ class SparseGPMiniBatch(SparseGP): woodbury_vector = self.posterior._woodbury_vector d = self.stochastics.d - posterior, log_marginal_likelihood, \ - grad_dict, self.full_values, _ = self._inner_parameters_changed( + posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed( self.kern, self.X, self.Z, self.likelihood, self.Y_normalized[:, d], self.Y_metadata) @@ -308,7 +307,7 @@ class SparseGPMiniBatch(SparseGP): self._log_marginal_likelihood += log_marginal_likelihood - self._outer_values_update(self.full_values) + self._outer_values_update(self.grad_dict) woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None] woodbury_vector[:, d] = posterior.woodbury_vector @@ -322,5 +321,5 @@ class SparseGPMiniBatch(SparseGP): elif self.stochastics: self._outer_loop_without_missing_data() else: - self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) - self._outer_values_update(self.full_values) + self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) + self._outer_values_update(self.grad_dict) From afba8d5c5e4b8742f5f28af4963bab021810f4fa Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 28 Aug 2015 10:07:19 +0300 Subject: [PATCH 04/13] Made sparse gp work again --- GPy/models/bayesian_gplvm_minibatch.py | 2 ++ GPy/models/sparse_gp_minibatch.py | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index b7e0c9b2..5058f17b 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -123,6 +123,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): def _outer_init_full_values(self): full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values() + full_values['dL_dpsi0'] = np.zeros(self.X.shape[0]) + full_values['dL_dpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0])) return full_values def parameters_changed(self): diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index eab8d8f3..3065b260 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -214,8 +214,8 @@ class SparseGPMiniBatch(SparseGP): to initialize the gradients for the mean and the variance in order to have the full gradient for indexing) """ - return {'dL_dpsi0': np.zeros(self.X.shape[0]), - 'dL_dpsi1': np.zeros((self.X.shape[0], self.Z.shape[0]))} + return {'dL_dKdiag': np.zeros(self.X.shape[0]), + 'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))} def _outer_loop_for_missing_data(self): Lm = None @@ -243,10 +243,8 @@ class SparseGPMiniBatch(SparseGP): psi1 = self.kern.psi1(self.Z, self.X) psi2 = self.kern.psi2(self.Z, self.X) else: - if psi0 is None: - psi0 = kern.Kdiag(X) - if psi1 is None: - psi1 = kern.K(X, Z) + psi0 = self.kern.Kdiag(self.X) + psi1 = self.kern.K(self.X, self.Z) psi2 = None self.psi0 = psi0 @@ -261,8 +259,12 @@ class SparseGPMiniBatch(SparseGP): psi0ni = psi0[ninan] psi1ni = psi1[ninan] - psi2ni = psi2[ninan] - value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan) + if self.has_uncertain_inputs(): + psi2ni = psi2[ninan] + value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan) + else: + psi2ni = None + value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan) posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed( self.kern, self.X[ninan], From e12575c0c6eb224d01c84414a25c87a00c971762 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 28 Aug 2015 11:47:10 +0300 Subject: [PATCH 05/13] Gradients for X seem to match --- GPy/models/bayesian_gplvm_minibatch.py | 49 +++++++++++++++++++++----- GPy/models/sparse_gp_minibatch.py | 2 +- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 5058f17b..8b8c6feb 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -83,6 +83,35 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None,psi0=None, psi1=None, psi2=None, **kw): posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2, **kw) + kl_fctr = self.kl_factr + if self.has_uncertain_inputs(): + if self.missing_data: + d = self.output_dim + log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d + else: + log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X) + + X.mean.gradient[:] = 0 + X.variance.gradient[:] = 0 + + self.variational_prior.update_gradients_KL(X) + if self.missing_data: + grad_dict['meangrad'] = kl_fctr*X.mean.gradient/d + grad_dict['vargrad'] = kl_fctr*X.variance.gradient/d + else: + grad_dict['meangrad'] = kl_fctr*X.mean.gradient + grad_dict['vargrad'] = kl_fctr*X.variance.gradient + + #if subset_indices is not None: + #value_indices['meangrad'] = subset_indices['samples'] + #value_indices['vargrad'] = subset_indices['samples'] + + #grad_dict['meangrad'] = kl_fctr*self.X.mean.gradient + #grad_dict['vargrad'] = kl_fctr*self.X.variance.gradient + #self.X.mean.gradient[:] = 0 + #self.X.variance.gradient[:] = 0 + #import ipdb; ipdb.set_trace() # XXX BREAKPOINT + return posterior, log_marginal_likelihood, grad_dict def _outer_values_update(self, full_values): @@ -92,28 +121,28 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): """ super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) if self.has_uncertain_inputs(): - full_values['meangrad'], full_values['vargrad'] = self.kern.gradients_qX_expectations( + meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations( variational_posterior=self.X, Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], dL_dpsi1=full_values['dL_dpsi1'], dL_dpsi2=full_values['dL_dpsi2'], psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) + full_values['meangrad'] += meangrad_tmp + full_values['vargrad'] += vargrad_tmp else: full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z) full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X) - kl_fctr = self.kl_factr - if self.has_uncertain_inputs(): - self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) + #kl_fctr = self.kl_factr + #if self.has_uncertain_inputs(): + #self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) # Subsetting Variational Posterior objects, makes the gradients # empty. We need them to be 0 though: - self.X.mean.gradient[:] = 0 - self.X.variance.gradient[:] = 0 + #self.X.mean.gradient[:] = 0 + #self.X.variance.gradient[:] = 0 - self.variational_prior.update_gradients_KL(self.X) - full_values['meangrad'] += kl_fctr*self.X.mean.gradient - full_values['vargrad'] += kl_fctr*self.X.variance.gradient + #self.variational_prior.update_gradients_KL(self.X) if self.has_uncertain_inputs(): self.X.mean.gradient = full_values['meangrad'] @@ -123,6 +152,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): def _outer_init_full_values(self): full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values() + full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) + full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) full_values['dL_dpsi0'] = np.zeros(self.X.shape[0]) full_values['dL_dpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0])) return full_values diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 3065b260..a5676e9d 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -261,7 +261,7 @@ class SparseGPMiniBatch(SparseGP): psi1ni = psi1[ninan] if self.has_uncertain_inputs(): psi2ni = psi2[ninan] - value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan) + value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan) else: psi2ni = None value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan) From b89196c7cf25c4b4266133045762f3216ad998b6 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 28 Aug 2015 14:03:14 +0300 Subject: [PATCH 06/13] Checking kerngrads full_value --- GPy/models/bayesian_gplvm_minibatch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 8b8c6feb..85b1e44f 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -120,6 +120,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): E.g. set the gradients of parameters, etc. """ super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) + print full_values['kerngrad'] + import ipdb; ipdb.set_trace() # XXX BREAKPOINT if self.has_uncertain_inputs(): meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations( variational_posterior=self.X, From 1a2b194aa27cfe8169a2d716dc6b4c6b78dd020f Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 28 Aug 2015 14:23:44 +0300 Subject: [PATCH 07/13] Removed comment --- GPy/models/bayesian_gplvm_minibatch.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 85b1e44f..8b8c6feb 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -120,8 +120,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): E.g. set the gradients of parameters, etc. """ super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) - print full_values['kerngrad'] - import ipdb; ipdb.set_trace() # XXX BREAKPOINT if self.has_uncertain_inputs(): meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations( variational_posterior=self.X, From c83f56723ec7ee3e5725a8db941b0edcb045e043 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Sun, 30 Aug 2015 22:22:27 +0300 Subject: [PATCH 08/13] passing psi statistics --- GPy/models/sparse_gp_minibatch.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index a5676e9d..c9d13e6b 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -167,21 +167,27 @@ class SparseGPMiniBatch(SparseGP): dL_dKmm = full_values['dL_dKmm'] self.kern.update_gradients_full(dL_dKmm, self.Z, None) full_values['kerngrad'] = self.kern.gradient.copy() - self.kern.update_gradients_expectations(variational_posterior=self.X, - Z=self.Z, - dL_dpsi0=full_values['dL_dpsi0'], - dL_dpsi1=full_values['dL_dpsi1'], - dL_dpsi2=full_values['dL_dpsi2']) + self.kern.update_gradients_expectations( + variational_posterior=self.X, + Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], + dL_dpsi1=full_values['dL_dpsi1'], + dL_dpsi2=full_values['dL_dpsi2'], + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) + #self.kern.update_gradients_expectations(variational_posterior=self.X, + #Z=self.Z, + #dL_dpsi0=full_values['dL_dpsi0'], + #dL_dpsi1=full_values['dL_dpsi1'], + #dL_dpsi2=full_values['dL_dpsi2']) full_values['kerngrad'] += self.kern.gradient #gradients wrt Z full_values['Zgrad'] = self.kern.gradients_X(dL_dKmm, self.Z) full_values['Zgrad'] += self.kern.gradients_Z_expectations( - full_values['dL_dpsi0'], - full_values['dL_dpsi1'], - full_values['dL_dpsi2'], - Z=self.Z, - variational_posterior=self.X) + variational_posterior=self.X, + Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], + dL_dpsi1=full_values['dL_dpsi1'], + dL_dpsi2=full_values['dL_dpsi2'], + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) else: #gradients wrt kernel self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X) From 3818aa37452bea67e670616a400a363ae2c6edfb Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 31 Aug 2015 14:13:35 +0300 Subject: [PATCH 09/13] Try calculating dL_dpsi1*psi1 individually for each dimension as we go along --- GPy/kern/_src/kern.py | 12 ++++++------ GPy/kern/_src/kernel_slice_operations.py | 12 ++++++------ GPy/kern/_src/psi_comp/__init__.py | 4 ++-- GPy/kern/_src/psi_comp/rbf_psi_comp.py | 16 +++++++++------- GPy/kern/_src/rbf.py | 12 ++++++------ GPy/models/bayesian_gplvm_minibatch.py | 8 +++++++- GPy/models/sparse_gp_minibatch.py | 14 +++++++++++--- 7 files changed, 47 insertions(+), 31 deletions(-) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index fb12030b..78488628 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -117,7 +117,7 @@ class Kern(Parameterized): raise NotImplementedError def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): """ Set the gradients of all parameters when doing inference with uncertain inputs, using expectations of the kernel. @@ -129,26 +129,26 @@ class Kern(Parameterized): dL_dpsi2 * dpsi2_d{theta_i} """ dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=psi0, psi1=psi1, psi2=psi2)[0] + psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[0] self.gradient[:] = dtheta def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): """ Returns the derivative of the objective wrt Z, using the chain rule through the expectation variables. """ return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=psi0, psi1=psi1, psi2=psi2)[1] + psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[1] def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): """ Compute the gradients wrt the parameters of the variational distruibution q(X), chain-ruling via the expectations of the kernel """ return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=psi0, psi1=psi1, psi2=psi2)[2:] + psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[2:] def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs): """ diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index a2058ca8..49899512 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -117,30 +117,30 @@ def _slice_psi(f): def _slice_update_gradients_expectations(f): @wraps(f) def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): with _Slice_wrap(self, Z, variational_posterior) as s: ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2, - psi0=psi0, psi1=psi1, psi2=psi2) + psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2) return ret return wrap def _slice_gradients_Z_expectations(f): @wraps(f) def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): with _Slice_wrap(self, Z, variational_posterior) as s: ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2, - psi0=psi0, psi1=psi1, psi2=psi2)) + psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)) return ret return wrap def _slice_gradients_qX_expectations(f): @wraps(f) def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): with _Slice_wrap(self, variational_posterior, Z) as s: ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X, - psi0=psi0, psi1=psi1, psi2=psi2)) + psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)) r2 = ret[:2] ret[0] = s.handle_return_array(r2[0]) ret[1] = s.handle_return_array(r2[1]) diff --git a/GPy/kern/_src/psi_comp/__init__.py b/GPy/kern/_src/psi_comp/__init__.py index 6849ed81..088b514c 100644 --- a/GPy/kern/_src/psi_comp/__init__.py +++ b/GPy/kern/_src/psi_comp/__init__.py @@ -24,10 +24,10 @@ class PSICOMP_RBF(Pickleable): @Cache_this(limit=10, ignore_args=(0,1,2,3)) def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): if isinstance(variational_posterior, variational.NormalPosterior): return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, - psi0=psi0, psi1=psi1, psi2=psi2) + psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2) elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) else: diff --git a/GPy/kern/_src/psi_comp/rbf_psi_comp.py b/GPy/kern/_src/psi_comp/rbf_psi_comp.py index de958a37..eaf2b04d 100644 --- a/GPy/kern/_src/psi_comp/rbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/rbf_psi_comp.py @@ -69,11 +69,11 @@ def __psi2computations(variance, lengthscale, Z, mu, S): return _psi2 def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): ARD = (len(lengthscale)!=1) - dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi1=psi1) - dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi2=psi2) + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi1=psi1, Lpsi1=Lpsi1) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, psi2=psi2, Lpsi2=Lpsi2) dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 @@ -87,7 +87,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS -def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None): +def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None, Lpsi1=None): """ dL_dpsi1 - NxM Z - MxQ @@ -108,7 +108,8 @@ def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None): if psi1 is None: psi1 = _psi1computations(variance, lengthscale, Z, mu, S) - Lpsi1 = dL_dpsi1*psi1 + if Lpsi1 is None: + Lpsi1 = dL_dpsi1*psi1 Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ denom = 1./(S+lengthscale2) Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ @@ -120,7 +121,7 @@ def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None): return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS -def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None): +def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None): """ Z - MxQ mu - NxQ @@ -143,7 +144,8 @@ def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None): if psi2 is None: psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM - Lpsi2 = dL_dpsi2*psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out + if Lpsi2 is None: + Lpsi2 = dL_dpsi2*psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 357c48ec..3ba782b1 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -59,16 +59,16 @@ class RBF(Stationary): return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2] def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): - dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[:2] + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): + dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[:2] self.variance.gradient = dL_dvar self.lengthscale.gradient = dL_dlengscale def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): - return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[2] + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): + return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[2] def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, - psi0=None, psi1=None, psi2=None): - return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2)[3:] + psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): + return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior, psi0=psi0, psi1=psi1, psi2=psi2, Lpsi0=Lpsi0, Lpsi1=Lpsi1, Lpsi2=Lpsi2)[3:] diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 8b8c6feb..0828ed6b 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -126,7 +126,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], dL_dpsi1=full_values['dL_dpsi1'], dL_dpsi2=full_values['dL_dpsi2'], - psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, + Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) full_values['meangrad'] += meangrad_tmp full_values['vargrad'] += vargrad_tmp else: @@ -156,6 +157,11 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) full_values['dL_dpsi0'] = np.zeros(self.X.shape[0]) full_values['dL_dpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0])) + full_values['dL_dpsi2'] = np.zeros((self.Z.shape[0], self.Z.shape[0])) + + full_values['Lpsi0'] = np.zeros(self.X.shape[0]) + full_values['Lpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0])) + full_values['Lpsi2'] = np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0])) return full_values def parameters_changed(self): diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index c9d13e6b..e13d8061 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -106,6 +106,10 @@ class SparseGPMiniBatch(SparseGP): posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs) + if self.has_uncertain_inputs(): + grad_dict['Lpsi0'] = grad_dict['dL_dpsi0']*psi0 + grad_dict['Lpsi1'] = grad_dict['dL_dpsi1']*psi1 + grad_dict['Lpsi2'] = grad_dict['dL_dpsi2']*psi2 return posterior, log_marginal_likelihood, grad_dict def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None): @@ -172,7 +176,8 @@ class SparseGPMiniBatch(SparseGP): Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], dL_dpsi1=full_values['dL_dpsi1'], dL_dpsi2=full_values['dL_dpsi2'], - psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, + Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) #self.kern.update_gradients_expectations(variational_posterior=self.X, #Z=self.Z, #dL_dpsi0=full_values['dL_dpsi0'], @@ -187,7 +192,8 @@ class SparseGPMiniBatch(SparseGP): Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], dL_dpsi1=full_values['dL_dpsi1'], dL_dpsi2=full_values['dL_dpsi2'], - psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, + Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) else: #gradients wrt kernel self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X) @@ -267,7 +273,9 @@ class SparseGPMiniBatch(SparseGP): psi1ni = psi1[ninan] if self.has_uncertain_inputs(): psi2ni = psi2[ninan] - value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan) + #value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan) + value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan, + Lpsi0=ninan, Lpsi1=ninan, Lpsi2=ninan) else: psi2ni = None value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan) From 50b9e4dc82d7efc42e1f43ab1ffd47c4afd231a9 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 1 Sep 2015 19:17:56 +0300 Subject: [PATCH 10/13] corrected caching for psi derivatives --- GPy/kern/_src/kern.py | 7 +------ GPy/kern/_src/psi_comp/rbf_psi_comp.py | 12 ++++++++++-- GPy/models/bayesian_gplvm_minibatch.py | 26 +++++++++----------------- GPy/models/sparse_gp_minibatch.py | 18 +++++++++--------- 4 files changed, 29 insertions(+), 34 deletions(-) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 78488628..77f4d563 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -58,12 +58,7 @@ class Kern(Parameterized): self._sliced_X = 0 self.useGPU = self._support_GPU and useGPU - self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool) - - #FIXME: temporary solution - from ...core.parameterization.lists_and_dicts import ObserverList - self._return_psi2_n_flag.observers = ObserverList() - self._return_psi2_n_flag._update_on = True + self._return_psi2_n_flag = ObsAr(np.zeros(1).astype(bool)) from .psi_comp import PSICOMP_GH self.psicomp = PSICOMP_GH() diff --git a/GPy/kern/_src/psi_comp/rbf_psi_comp.py b/GPy/kern/_src/psi_comp/rbf_psi_comp.py index eaf2b04d..9d85c93e 100644 --- a/GPy/kern/_src/psi_comp/rbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/rbf_psi_comp.py @@ -68,6 +68,7 @@ def __psi2computations(variance, lengthscale, Z, mu, S): _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) return _psi2 +@profile def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): ARD = (len(lengthscale)!=1) @@ -121,6 +122,7 @@ def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None, Lpsi1=No return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS +@profile def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None): """ Z - MxQ @@ -156,8 +158,14 @@ def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=No _dL_dvar = Lpsi2sum.sum()*2/variance _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] - _dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \ - 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) + _dL_dZ1 = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2 + _dL_dZ2 = np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2 + _dL_dZ3 = 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) + _dL_dZ4 = - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) + _dL_dZ5 = - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) + _dL_dZ = _dL_dZ1 + _dL_dZ2 + _dL_dZ3 + _dL_dZ4 + _dL_dZ5 + #_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \ + #2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) _dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))- (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 0828ed6b..ab4cc909 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba import logging from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch from GPy.core.parameterization.param import Param +from GPy.core.parameterization.observable_array import ObsAr class BayesianGPLVMMiniBatch(SparseGPMiniBatch): """ @@ -134,17 +135,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z) full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X) - #kl_fctr = self.kl_factr - #if self.has_uncertain_inputs(): - #self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) - - # Subsetting Variational Posterior objects, makes the gradients - # empty. We need them to be 0 though: - #self.X.mean.gradient[:] = 0 - #self.X.variance.gradient[:] = 0 - - #self.variational_prior.update_gradients_KL(self.X) - if self.has_uncertain_inputs(): self.X.mean.gradient = full_values['meangrad'] self.X.variance.gradient = full_values['vargrad'] @@ -155,13 +145,15 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values() full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) - full_values['dL_dpsi0'] = np.zeros(self.X.shape[0]) - full_values['dL_dpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0])) - full_values['dL_dpsi2'] = np.zeros((self.Z.shape[0], self.Z.shape[0])) - full_values['Lpsi0'] = np.zeros(self.X.shape[0]) - full_values['Lpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0])) - full_values['Lpsi2'] = np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0])) + #FIXME Hack + full_values['dL_dpsi0'] = ObsAr(np.zeros(self.X.shape[0])) + full_values['dL_dpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0]))) + full_values['dL_dpsi2'] = ObsAr(np.zeros((self.Z.shape[0], self.Z.shape[0]))) + + full_values['Lpsi0'] = ObsAr(np.zeros(self.X.shape[0])) + full_values['Lpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0]))) + full_values['Lpsi2'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))) return full_values def parameters_changed(self): diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index e13d8061..1eebeeca 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -147,7 +147,11 @@ class SparseGPMiniBatch(SparseGP): if np.isscalar(current_values[key]): full_values[key] += current_values[key] else: - full_values[key][index] += current_values[key] + from ..core.parameterization.observable_array import ObsAr + if isinstance(full_values[key], ObsAr): + full_values[key].values[index] += current_values[key] + else: + full_values[key][index] += current_values[key] else: full_values[key] = current_values[key] @@ -178,11 +182,6 @@ class SparseGPMiniBatch(SparseGP): dL_dpsi2=full_values['dL_dpsi2'], psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) - #self.kern.update_gradients_expectations(variational_posterior=self.X, - #Z=self.Z, - #dL_dpsi0=full_values['dL_dpsi0'], - #dL_dpsi1=full_values['dL_dpsi1'], - #dL_dpsi2=full_values['dL_dpsi2']) full_values['kerngrad'] += self.kern.gradient #gradients wrt Z @@ -251,9 +250,10 @@ class SparseGPMiniBatch(SparseGP): #Compute the psi statistics for N once, but don't sum out N in psi2 if self.has_uncertain_inputs(): self.kern.return_psi2_n = True - psi0 = self.kern.psi0(self.Z, self.X) - psi1 = self.kern.psi1(self.Z, self.X) - psi2 = self.kern.psi2(self.Z, self.X) + from ..core.parameterization.observable_array import ObsAr + psi0 = ObsAr(self.kern.psi0(self.Z, self.X)) + psi1 = ObsAr(self.kern.psi1(self.Z, self.X)) + psi2 = ObsAr(self.kern.psi2(self.Z, self.X)) else: psi0 = self.kern.Kdiag(self.X) psi1 = self.kern.K(self.X, self.Z) From 81ef734908411ddaec45467b133b5b7a5d44eda2 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Wed, 2 Sep 2015 10:46:30 +0300 Subject: [PATCH 11/13] Reindented, did some profiling which looks promising --- GPy/kern/_src/psi_comp/rbf_psi_comp.py | 2 -- GPy/models/bayesian_gplvm_minibatch.py | 19 +++++++++---------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/GPy/kern/_src/psi_comp/rbf_psi_comp.py b/GPy/kern/_src/psi_comp/rbf_psi_comp.py index 9d85c93e..3fa2de94 100644 --- a/GPy/kern/_src/psi_comp/rbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/rbf_psi_comp.py @@ -68,7 +68,6 @@ def __psi2computations(variance, lengthscale, Z, mu, S): _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) return _psi2 -@profile def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): ARD = (len(lengthscale)!=1) @@ -122,7 +121,6 @@ def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None, Lpsi1=No return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS -@profile def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None): """ Z - MxQ diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index ab4cc909..5fd9af60 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -122,15 +122,15 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): """ super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) if self.has_uncertain_inputs(): - meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations( - variational_posterior=self.X, - Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], - dL_dpsi1=full_values['dL_dpsi1'], - dL_dpsi2=full_values['dL_dpsi2'], - psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, - Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) - full_values['meangrad'] += meangrad_tmp - full_values['vargrad'] += vargrad_tmp + meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations( + variational_posterior=self.X, + Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], + dL_dpsi1=full_values['dL_dpsi1'], + dL_dpsi2=full_values['dL_dpsi2'], + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, + Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) + full_values['meangrad'] += meangrad_tmp + full_values['vargrad'] += vargrad_tmp else: full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z) full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X) @@ -146,7 +146,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) - #FIXME Hack full_values['dL_dpsi0'] = ObsAr(np.zeros(self.X.shape[0])) full_values['dL_dpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0]))) full_values['dL_dpsi2'] = ObsAr(np.zeros((self.Z.shape[0], self.Z.shape[0]))) From b73932c35034bbc5a37cc374edd070f18a0d42ca Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Thu, 3 Sep 2015 13:32:59 +0300 Subject: [PATCH 12/13] Small edits for linear kernel --- GPy/kern/_src/psi_comp/linear_psi_comp.py | 2 +- GPy/models/sparse_gp_minibatch.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/GPy/kern/_src/psi_comp/linear_psi_comp.py b/GPy/kern/_src/psi_comp/linear_psi_comp.py index cd5d8fa0..a24b263a 100644 --- a/GPy/kern/_src/psi_comp/linear_psi_comp.py +++ b/GPy/kern/_src/psi_comp/linear_psi_comp.py @@ -22,7 +22,7 @@ def psicomputations(variance, Z, variational_posterior, return_psi2_n=False): psi0 = (variance*(np.square(mu)+S)).sum(axis=1) psi1 = np.dot(mu,(variance*Z).T) - if sum_N_psi2: + if return_psi2_n: psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T) else: raise NotImplementedError diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 1eebeeca..92b2baf5 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -303,6 +303,8 @@ class SparseGPMiniBatch(SparseGP): self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol) self._outer_values_update(self.full_values) + if self.has_uncertain_inputs(): + self.kern.return_psi2_n = False def _outer_loop_without_missing_data(self): self._log_marginal_likelihood = 0 From cb024f0bf834f707b3afa26c5fcdaa0a1f88144b Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 4 Sep 2015 10:33:32 +0100 Subject: [PATCH 13/13] [spgp minibatch] added new routine for psi NxMxM, much faster, little bigger mem footbprint --- GPy/inference/optimization/stochastics.py | 37 ++++---- GPy/kern/_src/kern.py | 18 +--- GPy/kern/_src/rbf.py | 5 +- GPy/models/bayesian_gplvm_minibatch.py | 78 ++++++---------- GPy/models/sparse_gp_minibatch.py | 109 ++++++++++------------ 5 files changed, 101 insertions(+), 146 deletions(-) diff --git a/GPy/inference/optimization/stochastics.py b/GPy/inference/optimization/stochastics.py index f7ad1245..0fc488a2 100644 --- a/GPy/inference/optimization/stochastics.py +++ b/GPy/inference/optimization/stochastics.py @@ -43,7 +43,7 @@ class SparseGPMissing(StochasticStorage): np.set_printoptions(threshold='nan') for d in range(self.Y.shape[1]): inan = np.isnan(self.Y)[:, d] - arr_str = np.array2string(~inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'}) + arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'}) try: bdict[arr_str][0].append(d) except: @@ -56,35 +56,36 @@ class SparseGPStochastics(StochasticStorage): For the sparse gp we need to store the dimension we are in, and the indices corresponding to those """ - def __init__(self, model, batchsize=1): + def __init__(self, model, batchsize=1, missing_data=True): self.batchsize = batchsize self.output_dim = model.Y.shape[1] self.Y = model.Y_normalized + self.missing_data = missing_data self.reset() self.do_stochastics() def do_stochastics(self): + import numpy as np if self.batchsize == 1: self.current_dim = (self.current_dim+1)%self.output_dim - self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]] + self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]] else: - import numpy as np self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False) bdict = {} - opt = np.get_printoptions() - np.set_printoptions(threshold='nan') - for d in self.d: - inan = np.isnan(self.Y[:, d]) - arr_str = int(np.array2string(inan, - np.inf, 0, - True, '', - formatter={'bool':lambda x: '1' if x else '0'}), 2) - try: - bdict[arr_str][0].append(d) - except: - bdict[arr_str] = [[d], ~inan] - np.set_printoptions(**opt) - self.d = bdict.values() + if self.missing_data: + opt = np.get_printoptions() + np.set_printoptions(threshold='nan') + for d in self.d: + inan = np.isnan(self.Y[:, d]) + arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'}) + try: + bdict[arr_str][0].append(d) + except: + bdict[arr_str] = [[d], ~inan] + np.set_printoptions(**opt) + self.d = bdict.values() + else: + self.d = [[self.d, None]] def reset(self): self.current_dim = -1 diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 77f4d563..f6971faf 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -58,24 +58,10 @@ class Kern(Parameterized): self._sliced_X = 0 self.useGPU = self._support_GPU and useGPU - self._return_psi2_n_flag = ObsAr(np.zeros(1).astype(bool)) from .psi_comp import PSICOMP_GH self.psicomp = PSICOMP_GH() - @property - def return_psi2_n(self): - """ - Flag whether to pass back psi2 as NxMxM or MxM, by summing out N. - """ - return self._return_psi2_n_flag[0] - @return_psi2_n.setter - def return_psi2_n(self, val): - def visit(self): - if isinstance(self, Kern): - self._return_psi2_n_flag[0]=val - self.traverse(visit) - @Cache_this(limit=20) def _slice_X(self, X): return X[:, self.active_dims] @@ -97,7 +83,9 @@ class Kern(Parameterized): def psi1(self, Z, variational_posterior): return self.psicomp.psicomputations(self, Z, variational_posterior)[1] def psi2(self, Z, variational_posterior): - return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2] + return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2] + def psi2n(self, Z, variational_posterior): + return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2] def gradients_X(self, dL_dK, X, X2): raise NotImplementedError def gradients_X_diag(self, dL_dKdiag, X): diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 3ba782b1..145b9bfd 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -56,7 +56,10 @@ class RBF(Stationary): return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] def psi2(self, Z, variational_posterior): - return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=self.return_psi2_n)[2] + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=False)[2] + + def psi2n(self, Z, variational_posterior): + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior, return_psi2_n=True)[2] def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior, psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index 5fd9af60..1d06f07f 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -81,38 +81,9 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): """Get the gradients of the posterior distribution of X in its specific form.""" return X.mean.gradient, X.variance.gradient - def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None,psi0=None, psi1=None, psi2=None, **kw): + def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw): posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2, **kw) - kl_fctr = self.kl_factr - if self.has_uncertain_inputs(): - if self.missing_data: - d = self.output_dim - log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d - else: - log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X) - - X.mean.gradient[:] = 0 - X.variance.gradient[:] = 0 - - self.variational_prior.update_gradients_KL(X) - if self.missing_data: - grad_dict['meangrad'] = kl_fctr*X.mean.gradient/d - grad_dict['vargrad'] = kl_fctr*X.variance.gradient/d - else: - grad_dict['meangrad'] = kl_fctr*X.mean.gradient - grad_dict['vargrad'] = kl_fctr*X.variance.gradient - - #if subset_indices is not None: - #value_indices['meangrad'] = subset_indices['samples'] - #value_indices['vargrad'] = subset_indices['samples'] - - #grad_dict['meangrad'] = kl_fctr*self.X.mean.gradient - #grad_dict['vargrad'] = kl_fctr*self.X.variance.gradient - #self.X.mean.gradient[:] = 0 - #self.X.variance.gradient[:] = 0 - #import ipdb; ipdb.set_trace() # XXX BREAKPOINT - return posterior, log_marginal_likelihood, grad_dict def _outer_values_update(self, full_values): @@ -127,36 +98,41 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], dL_dpsi1=full_values['dL_dpsi1'], dL_dpsi2=full_values['dL_dpsi2'], - psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, - Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) - full_values['meangrad'] += meangrad_tmp - full_values['vargrad'] += vargrad_tmp - else: - full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z) - full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X) + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) + + kl_fctr = self.kl_factr + + self.X.mean.gradient[:] = 0 + self.X.variance.gradient[:] = 0 + self.variational_prior.update_gradients_KL(self.X) + + if self.missing_data or not self.stochastics: + self.X.mean.gradient = kl_fctr*self.X.mean.gradient + self.X.variance.gradient = kl_fctr*self.X.variance.gradient + else: + d = self.output_dim + self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d + self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d + self.X.mean.gradient += meangrad_tmp + self.X.variance.gradient += vargrad_tmp - if self.has_uncertain_inputs(): - self.X.mean.gradient = full_values['meangrad'] - self.X.variance.gradient = full_values['vargrad'] else: - self.X.gradient = full_values['Xgrad'] + self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z) + self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X) def _outer_init_full_values(self): full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values() - full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) - full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) - - full_values['dL_dpsi0'] = ObsAr(np.zeros(self.X.shape[0])) - full_values['dL_dpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0]))) - full_values['dL_dpsi2'] = ObsAr(np.zeros((self.Z.shape[0], self.Z.shape[0]))) - - full_values['Lpsi0'] = ObsAr(np.zeros(self.X.shape[0])) - full_values['Lpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0]))) - full_values['Lpsi2'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))) return full_values def parameters_changed(self): super(BayesianGPLVMMiniBatch,self).parameters_changed() + kl_fctr = self.kl_factr + if self.missing_data or not self.stochastics: + self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) + elif self.stochastics: + d = self.output_dim + self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d + if isinstance(self.inference_method, VarDTC_minibatch): return diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 92b2baf5..ecb53f0f 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP): if stochastic and missing_data: self.missing_data = True - self.stochastics = SparseGPStochastics(self, batchsize) + self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data) elif stochastic and not missing_data: self.missing_data = False - self.stochastics = SparseGPStochastics(self, batchsize) + self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data) elif missing_data: self.missing_data = True self.stochastics = SparseGPMissing(self) @@ -105,11 +105,6 @@ class SparseGPMiniBatch(SparseGP): psi2_sum_n = psi2.sum(axis=0) posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs) - - if self.has_uncertain_inputs(): - grad_dict['Lpsi0'] = grad_dict['dL_dpsi0']*psi0 - grad_dict['Lpsi1'] = grad_dict['dL_dpsi1']*psi1 - grad_dict['Lpsi2'] = grad_dict['dL_dpsi2']*psi2 return posterior, log_marginal_likelihood, grad_dict def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None): @@ -144,14 +139,10 @@ class SparseGPMiniBatch(SparseGP): else: index = slice(None) if key in full_values: - if np.isscalar(current_values[key]): + try: + full_values[key][index] += current_values[key] + except: full_values[key] += current_values[key] - else: - from ..core.parameterization.observable_array import ObsAr - if isinstance(full_values[key], ObsAr): - full_values[key].values[index] += current_values[key] - else: - full_values[key][index] += current_values[key] else: full_values[key] = current_values[key] @@ -174,43 +165,39 @@ class SparseGPMiniBatch(SparseGP): #gradients wrt kernel dL_dKmm = full_values['dL_dKmm'] self.kern.update_gradients_full(dL_dKmm, self.Z, None) - full_values['kerngrad'] = self.kern.gradient.copy() + kgrad = self.kern.gradient.copy() self.kern.update_gradients_expectations( variational_posterior=self.X, Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], dL_dpsi1=full_values['dL_dpsi1'], dL_dpsi2=full_values['dL_dpsi2'], - psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, - Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) - full_values['kerngrad'] += self.kern.gradient + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) + self.kern.gradient += kgrad + #gradients wrt Z - full_values['Zgrad'] = self.kern.gradients_X(dL_dKmm, self.Z) - full_values['Zgrad'] += self.kern.gradients_Z_expectations( + self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) + self.Z.gradient += self.kern.gradients_Z_expectations( variational_posterior=self.X, Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'], dL_dpsi1=full_values['dL_dpsi1'], dL_dpsi2=full_values['dL_dpsi2'], - psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, - Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) + psi0=self.psi0, psi1=self.psi1, psi2=self.psi2) else: #gradients wrt kernel self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X) - full_values['kerngrad'] = self.kern.gradient.copy() + kgrad = self.kern.gradient.copy() self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z) - full_values['kerngrad'] += self.kern.gradient + kgrad += self.kern.gradient self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None) - full_values['kerngrad'] += self.kern.gradient + self.kern.gradient += kgrad + #kgrad += self.kern.gradient + #gradients wrt Z - full_values['Zgrad'] = self.kern.gradients_X(full_values['dL_dKmm'], self.Z) - full_values['Zgrad'] += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X) + self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z) + self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X) self.likelihood.update_gradients(full_values['dL_dthetaL']) - full_values['likgrad'] = self.likelihood.gradient.copy() - - self.likelihood.gradient = full_values['likgrad'] - self.kern.gradient = full_values['kerngrad'] - self.Z.gradient = full_values['Zgrad'] def _outer_init_full_values(self): """ @@ -225,8 +212,15 @@ class SparseGPMiniBatch(SparseGP): to initialize the gradients for the mean and the variance in order to have the full gradient for indexing) """ - return {'dL_dKdiag': np.zeros(self.X.shape[0]), - 'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))} + retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0]))) + if self.has_uncertain_inputs(): + retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]), + dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])), + dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0])))) + else: + retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]), + 'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))}) + return retd def _outer_loop_for_missing_data(self): Lm = None @@ -247,35 +241,17 @@ class SparseGPMiniBatch(SparseGP): message = m_f(-1) print(message, end=' ') - #Compute the psi statistics for N once, but don't sum out N in psi2 - if self.has_uncertain_inputs(): - self.kern.return_psi2_n = True - from ..core.parameterization.observable_array import ObsAr - psi0 = ObsAr(self.kern.psi0(self.Z, self.X)) - psi1 = ObsAr(self.kern.psi1(self.Z, self.X)) - psi2 = ObsAr(self.kern.psi2(self.Z, self.X)) - else: - psi0 = self.kern.Kdiag(self.X) - psi1 = self.kern.K(self.X, self.Z) - psi2 = None - - self.psi0 = psi0 - self.psi1 = psi1 - self.psi2 = psi2 - for d, ninan in self.stochastics.d: if not self.stochastics: print(' '*(len(message)) + '\r', end=' ') message = m_f(d) print(message, end=' ') - psi0ni = psi0[ninan] - psi1ni = psi1[ninan] + psi0ni = self.psi0[ninan] + psi1ni = self.psi1[ninan] if self.has_uncertain_inputs(): - psi2ni = psi2[ninan] - #value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan) - value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, meangrad=ninan, vargrad=ninan, - Lpsi0=ninan, Lpsi1=ninan, Lpsi2=ninan) + psi2ni = self.psi2[ninan] + value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan) else: psi2ni = None value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan) @@ -290,7 +266,7 @@ class SparseGPMiniBatch(SparseGP): # Fill out the full values by adding in the apporpriate grad_dict # values self._inner_take_over_or_update(self.full_values, grad_dict, value_indices) - self._inner_values_update(grad_dict) # What is this for? + self._inner_values_update(grad_dict) # What is this for? -> MRD woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None] woodbury_vector[:, d] = posterior.woodbury_vector @@ -307,8 +283,6 @@ class SparseGPMiniBatch(SparseGP): self.kern.return_psi2_n = False def _outer_loop_without_missing_data(self): - self._log_marginal_likelihood = 0 - if self.posterior is None: woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim)) woodbury_vector = np.zeros((self.num_inducing, self.output_dim)) @@ -316,14 +290,14 @@ class SparseGPMiniBatch(SparseGP): woodbury_inv = self.posterior._woodbury_inv woodbury_vector = self.posterior._woodbury_vector - d = self.stochastics.d + d = self.stochastics.d[0][0] posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed( self.kern, self.X, self.Z, self.likelihood, self.Y_normalized[:, d], self.Y_metadata) self.grad_dict = grad_dict - self._log_marginal_likelihood += log_marginal_likelihood + self._log_marginal_likelihood = log_marginal_likelihood self._outer_values_update(self.grad_dict) @@ -334,6 +308,19 @@ class SparseGPMiniBatch(SparseGP): K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol) def parameters_changed(self): + #Compute the psi statistics for N once, but don't sum out N in psi2 + if self.has_uncertain_inputs(): + #psi0 = ObsAr(self.kern.psi0(self.Z, self.X)) + #psi1 = ObsAr(self.kern.psi1(self.Z, self.X)) + #psi2 = ObsAr(self.kern.psi2(self.Z, self.X)) + self.psi0 = self.kern.psi0(self.Z, self.X) + self.psi1 = self.kern.psi1(self.Z, self.X) + self.psi2 = self.kern.psi2n(self.Z, self.X) + else: + self.psi0 = self.kern.Kdiag(self.X) + self.psi1 = self.kern.K(self.X, self.Z) + self.psi2 = None + if self.missing_data: self._outer_loop_for_missing_data() elif self.stochastics: