diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index df3160f1..74a16894 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -54,11 +54,11 @@ class SparseGP(GPBase): self.Kmm = self.kern.K(self.Z) if self.has_uncertain_inputs: self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance) - self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T + self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance) self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance) else: self.psi0 = self.kern.Kdiag(self.X) - self.psi1 = self.kern.K(self.Z, self.X) + self.psi1 = self.kern.K(self.X, self.Z) self.psi2 = None def _computations(self): @@ -75,12 +75,13 @@ class SparseGP(GPBase): evals, evecs = linalg.eigh(psi2_beta) clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable tmp = evecs * np.sqrt(clipped_evals) + tmp = tmp.T else: if self.likelihood.is_heteroscedastic: - tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.num_data))) + tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data,1))) else: tmp = self.psi1 * (np.sqrt(self.likelihood.precision)) - tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) + tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1) self.A = tdot(tmp) @@ -89,7 +90,7 @@ class SparseGP(GPBase): self.LB = jitchol(self.B) # TODO: make a switch for either first compute psi1V, or VV.T - self.psi1V = np.dot(self.psi1, self.likelihood.V) + self.psi1V = np.dot(self.psi1.T, self.likelihood.V) # back substutue C into psi1V tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0) @@ -107,14 +108,14 @@ class SparseGP(GPBase): # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten() - self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T) + self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T).T dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) if self.likelihood.is_heteroscedastic: if self.has_uncertain_inputs: self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] else: - self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.num_data)) + self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (self.psi1 * self.likelihood.precision.reshape(self.num_data, 1)).T).T self.dL_dpsi2 = None else: dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta @@ -123,7 +124,7 @@ class SparseGP(GPBase): self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0) else: # subsume back into psi1 (==Kmn) - self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1) + self.dL_dpsi1 += 2.*np.dot(self.psi1, dL_dpsi2) self.dL_dpsi2 = None @@ -179,10 +180,10 @@ class SparseGP(GPBase): Kmmi = tdot(Lmi.T) diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)]) - self.likelihood.fit_FITC(self.Kmm, self.psi1, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion + self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion # raise NotImplementedError, "EP approximation not implemented for uncertain inputs" else: - self.likelihood.fit_DTC(self.Kmm, self.psi1) + self.likelihood.fit_DTC(self.Kmm, self.psi1.T) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self._set_params(self._get_params()) # update the GP @@ -196,10 +197,10 @@ class SparseGP(GPBase): dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) if self.has_uncertain_inputs: dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance) - dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance) + dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance) else: - dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X) + dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X, self.Z) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) return dL_dtheta @@ -213,7 +214,7 @@ class SparseGP(GPBase): dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) else: - dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X) + dL_dZ += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X) return dL_dZ def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): @@ -233,9 +234,9 @@ class SparseGP(GPBase): Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) else: - # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" + # assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented" Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts - mu = np.dot(Kx, self.Cpsi1V) + mu = np.dot(Kx.T, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" else: diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 4a05783b..76a0d99e 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -360,9 +360,7 @@ class kern(Parameterised): raise NotImplementedError("We don't handle linear/linear cross-terms") tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) - tmp2 = np.zeros_like(target) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1).T, Z, mu, S, tmp2) - target += tmp2 + p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) return target * 2 @@ -378,7 +376,7 @@ class kern(Parameterised): tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1).T * 2., Z, mu, S, target_mu, target_S) + p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) return target_mu, target_S diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index b2aa29f9..55582114 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -98,7 +98,7 @@ class linear(Kernpart): target += tmp.sum() def dK_dX(self, dL_dK, X, X2, target): - target += (((X2[:, None, :] * self.variances)) * dL_dK[:, :, None]).sum(0) + target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) #---------------------------------------# # PSI statistics # @@ -131,7 +131,7 @@ class linear(Kernpart): def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): """Do nothing for S, it does not affect psi1""" self._psi_computations(Z, mu, S) - target_mu += (dL_dpsi1.T[:, :, None] * (Z * self.variances)).sum(1) + target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): self.dK_dX(dL_dpsi1.T, Z, mu, target) diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index d1e0797c..5686d7bd 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -178,13 +178,13 @@ class rbf(Kernpart): self._psi_computations(Z, mu, S) denominator = (self.lengthscale2 * (self._psi1_denom)) dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) - target += np.sum(dL_dpsi1.T[:, :, None] * dpsi1_dZ, 0) + target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): self._psi_computations(Z, mu, S) tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom - target_mu += np.sum(dL_dpsi1.T[:, :, None] * tmp * self._psi1_dist, 1) - target_S += np.sum(dL_dpsi1.T[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) + target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) + target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) def psi2(self, Z, mu, S, target): self._psi_computations(Z, mu, S) diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py index 76fe65f1..ea2f8013 100644 --- a/GPy/models/sparse_gplvm.py +++ b/GPy/models/sparse_gplvm.py @@ -43,7 +43,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM): def dL_dX(self): dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X) - dL_dX += self.kern.dK_dX(self.dL_dpsi1.T, self.X, self.Z) + dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z) return dL_dX