diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py index 9f80ac9d..06bb4870 100644 --- a/GPy/kern/src/add.py +++ b/GPy/kern/src/add.py @@ -181,6 +181,8 @@ class Add(CombinationKernel): return psi2 def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1) + if not self._exact_psicomp: return Kern.update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) from .static import White, Bias for p1 in self.parts: @@ -192,12 +194,13 @@ class Add(CombinationKernel): if isinstance(p2, White): continue elif isinstance(p2, Bias): - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + eff_dL_dpsi1 += tmp * p2.variance else:# np.setdiff1d(p1._all_dims_active, ar2, assume_unique): # TODO: Careful, not correct for overlapping _all_dims_active - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1) if not self._exact_psicomp: return Kern.gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) from .static import White, Bias target = np.zeros(Z.shape) @@ -210,13 +213,15 @@ class Add(CombinationKernel): if isinstance(p2, White): continue elif isinstance(p2, Bias): - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + eff_dL_dpsi1 += tmp * p2.variance else: - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) return target def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1) + if not self._exact_psicomp: return Kern.gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) from .static import White, Bias target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters] @@ -229,9 +234,9 @@ class Add(CombinationKernel): if isinstance(p2, White): continue elif isinstance(p2, Bias): - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + eff_dL_dpsi1 += tmp * p2.variance else: - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) [np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))] return target_grads diff --git a/GPy/kern/src/psi_comp/rbf_psi_comp.py b/GPy/kern/src/psi_comp/rbf_psi_comp.py index 5667eec6..6e6c1957 100644 --- a/GPy/kern/src/psi_comp/rbf_psi_comp.py +++ b/GPy/kern/src/psi_comp/rbf_psi_comp.py @@ -106,6 +106,8 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): denom = 1./(2*S+lengthscale2) denom2 = np.square(denom) + if len(dL_dpsi2.shape)==2: dL_dpsi2 = (dL_dpsi2+dL_dpsi2.T)/2 + else: dL_dpsi2 = (dL_dpsi2+ np.swapaxes(dL_dpsi2, 1,2))/2 _psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out Lpsi2sum = Lpsi2.reshape(N,M*M).sum(1) #N diff --git a/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py b/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py index 50945db6..30312d68 100644 --- a/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py +++ b/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py @@ -370,6 +370,10 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF): @Cache_this(limit=10, ignore_args=(0,2,3,4)) def _psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + # resolve the requirement of dL_dpsi2 to be symmetric + if len(dL_dpsi2.shape)==2: dL_dpsi2 = (dL_dpsi2+dL_dpsi2.T)/2 + else: dL_dpsi2 = (dL_dpsi2+ np.swapaxes(dL_dpsi2, 1,2))/2 + variance, lengthscale = kern.variance, kern.lengthscale from ....util.linalg_gpu import sum_axis ARD = (len(lengthscale)!=1) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index e81ce2cf..e2f4324a 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -468,7 +468,7 @@ class Kernel_Psi_statistics_GradientTests(unittest.TestCase): self.w1 = np.random.randn(N) self.w2 = np.random.randn(N,M) self.w3 = np.random.randn(M,M) - self.w3 = self.w3+self.w3.T + self.w3 = self.w3#+self.w3.T self.w3n = np.random.randn(N,M,M) self.w3n = self.w3n+np.swapaxes(self.w3n, 1,2) @@ -476,7 +476,7 @@ class Kernel_Psi_statistics_GradientTests(unittest.TestCase): from GPy.kern import RBF,Linear,MLP,Bias,White Q = self.Z.shape[1] kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True),MLP(Q,ARD=True), RBF(Q,ARD=True)+Linear(Q,ARD=True)+Bias(Q)+White(Q) - ,RBF(Q,ARD=True)+Bias(Q)+White(Q), Linear(Q,ARD=True)+Bias(Q)+White(Q)] + ,RBF(Q,ARD=True)+Bias(Q)+White(Q), Linear(Q,ARD=True)+Bias(Q)+White(Q)] for k in kernels: k.randomize()