diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 2589dfbd..9bcbfac1 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -105,36 +105,41 @@ class BayesianGPLVM(SparseGP): Notes: This will only work with a univariate Gaussian likelihood (for now) """ - assert not self.likelihood.is_heteroscedastic N_test = Y.shape[0] input_dim = self.Z.shape[1] + means = np.zeros((N_test, input_dim)) covars = np.zeros((N_test, input_dim)) - - dpsi0 = -0.5 * self.input_dim * self.likelihood.precision - dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods - V = self.likelihood.precision * Y + + dpsi0 = -0.5 * self.input_dim / self.likelihood.variance + dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods + V = Y/self.likelihood.variance #compute CPsi1V - if self.Cpsi1V is None: - psi1V = np.dot(self.psi1.T, self.likelihood.V) - tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0) - tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1) - self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1) + #if self.Cpsi1V is None: + # psi1V = np.dot(self.psi1.T, self.likelihood.V) + # tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0) + # tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1) + # self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1) - dpsi1 = np.dot(self.Cpsi1V, V.T) + dpsi1 = np.dot(self.posterior.woodbury_vector, V.T) - start = np.zeros(self.input_dim * 2) + #start = np.zeros(self.input_dim * 2) + + + from scipy.optimize import minimize for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]): - args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2) - xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False) - + args = (input_dim, self.kern.copy(), self.Z, dpsi0, dpsi1_n.T, dpsi2) + res = minimize(latent_cost_and_grad, jac=True, x0=np.hstack((means[n], covars[n])), args=args, method='BFGS') + xopt = res.x mu, log_S = xopt.reshape(2, 1, -1) means[n] = mu[0].copy() covars[n] = np.exp(log_S[0]).copy() - return means, covars + X = NormalPosterior(means, covars) + + return X def dmu_dX(self, Xnew): """ @@ -166,57 +171,26 @@ class BayesianGPLVM(SparseGP): return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) -def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): +def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): """ objective function for fitting the latent variables for test points (negative log-likelihood: should be minimised!) """ - mu, log_S = mu_S.reshape(2, 1, -1) + mu = mu_S[:input_dim][None] + log_S = mu_S[input_dim:][None] S = np.exp(log_S) - psi0 = kern.psi0(Z, mu, S) - psi1 = kern.psi1(Z, mu, S) - psi2 = kern.psi2(Z, mu, S) + X = NormalPosterior(mu, S) - lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S) + psi0 = kern.psi0(Z, X) + psi1 = kern.psi1(Z, X) + psi2 = kern.psi2(Z, X) - mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S) - mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S) - mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S) - - dmu = mu0 + mu1 + mu2 - mu + lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S) + + dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X) + dmu = dLdmu - mu # dS = S0 + S1 + S2 -0.5 + .5/S - dlnS = S * (S0 + S1 + S2 - 0.5) + .5 + dlnS = S * (dLdS - 0.5) + .5 + return -lik, -np.hstack((dmu.flatten(), dlnS.flatten())) - -def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): - """ - objective function for fitting the latent variables (negative log-likelihood: should be minimised!) - This is the same as latent_cost_and_grad but only for the objective - """ - mu, log_S = mu_S.reshape(2, 1, -1) - S = np.exp(log_S) - - psi0 = kern.psi0(Z, mu, S) - psi1 = kern.psi1(Z, mu, S) - psi2 = kern.psi2(Z, mu, S) - - lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S) - return -float(lik) - -def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): - """ - This is the same as latent_cost_and_grad but only for the grad - """ - mu, log_S = mu_S.reshape(2, 1, -1) - S = np.exp(log_S) - - mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S) - mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S) - mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S) - - dmu = mu0 + mu1 + mu2 - mu - # dS = S0 + S1 + S2 -0.5 + .5/S - dlnS = S * (S0 + S1 + S2 - 0.5) + .5 - - return -np.hstack((dmu.flatten(), dlnS.flatten()))