diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 6b1c9cb4..303921ea 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -377,7 +377,7 @@ class GP(Model): if full_cov: dK2_dXdX = kern.gradients_XX(one, Xnew) else: - dK2_dXdX = -kern.gradients_XX(one, Xnew).sum(0) + dK2_dXdX = kern.gradients_XX(one, Xnew).sum(0) #dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1])) #for i in range(Xnew.shape[0]): # dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:]) diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py index 5ac773c9..99fe809b 100644 --- a/GPy/kern/src/add.py +++ b/GPy/kern/src/add.py @@ -85,23 +85,22 @@ class Add(CombinationKernel): [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] return target - def gradients_XX(self, dL_dK, X, X2, cov=True): - if cov: # full covarance - if X2 is None: - target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1])) - else: - target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) - else: # diagonal covariance - if X2 is None: - target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) - else: - target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) - [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=cov)) for p in self.parts] + def gradients_XX(self, dL_dK, X, X2): + if X2 is None: + target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1])) + else: + target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) + #else: # diagonal covariance + # if X2 is None: + # target = np.zeros((X.shape[0], X.shape[0], X.shape[1])) + # else: + # target = np.zeros((X.shape[0], X2.shape[0], X.shape[1])) + [target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts] return target - def gradients_XX_diag(self, dL_dKdiag, X, cov=True): + def gradients_XX_diag(self, dL_dKdiag, X): target = np.zeros(X.shape+(X.shape[1],)) - [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X, cov=cov)) for p in self.parts] + [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts] return target @Cache_this(limit=3, force_kwargs=['which_parts']) @@ -188,7 +187,7 @@ class Add(CombinationKernel): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1) - + if not self._exact_psicomp: return Kern.update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) from .static import White, Bias for p1 in self.parts: @@ -200,9 +199,9 @@ class Add(CombinationKernel): if isinstance(p2, White): continue elif isinstance(p2, Bias): - eff_dL_dpsi1 += tmp * p2.variance + eff_dL_dpsi1 += tmp * p2.variance else:# np.setdiff1d(p1._all_dims_active, ar2, assume_unique): # TODO: Careful, not correct for overlapping _all_dims_active - eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) + eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): @@ -219,7 +218,7 @@ class Add(CombinationKernel): if isinstance(p2, White): continue elif isinstance(p2, Bias): - eff_dL_dpsi1 += tmp * p2.variance + eff_dL_dpsi1 += tmp * p2.variance else: eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) @@ -227,7 +226,7 @@ class Add(CombinationKernel): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1) - + if not self._exact_psicomp: return Kern.gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) from .static import White, Bias target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters] @@ -240,9 +239,9 @@ class Add(CombinationKernel): if isinstance(p2, White): continue elif isinstance(p2, Bias): - eff_dL_dpsi1 += tmp * p2.variance + eff_dL_dpsi1 += tmp * p2.variance else: - eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) + eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) [np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))] return target_grads @@ -255,7 +254,7 @@ class Add(CombinationKernel): # other.unlink_parameter(p) # parts.extend(other.parts) # #self.link_parameters(*other_params) - # + # # else: # #self.link_parameter(other) # parts.append(other) @@ -271,7 +270,7 @@ class Add(CombinationKernel): else: return super(Add, self).input_sensitivity(summarize) - + def sde_update_gradient_full(self, gradients): """ Update gradient in the order in which parameters are represented in the @@ -283,12 +282,12 @@ class Add(CombinationKernel): part_param_num = len(p.param_array) # number of parameters in the part p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)]) part_start_param_index += part_param_num - + def sde(self): """ Support adding kernels for sde representation """ - + import scipy.linalg as la F = None @@ -312,51 +311,51 @@ class Add(CombinationKernel): L = la.block_diag(L,Lt) if (L is not None) else Lt Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct H = np.hstack((H,Ht)) if (H is not None) else Ht - + Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t - + if dF is not None: dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])), 'constant', constant_values=0) dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt else: dF = dFt - + if dQc is not None: dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])), 'constant', constant_values=0) dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct else: dQc = dQct - + if dPinf is not None: dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])), 'constant', constant_values=0) dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft else: dPinf = dPinft - + if dP0 is not None: dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])), 'constant', constant_values=0) dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t else: dP0 = dP0t - + n += Ft.shape[0] nq += Qct.shape[0] nd += dFt.shape[2] - + assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed" assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed" assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed" assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed" assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed" - assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed" + assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed" assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed" assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed" assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed" assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed" - + return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0) diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 22613fa2..141a1347 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -266,11 +266,11 @@ class Stationary(Kern): ..returns: dL2_dXdX: [NxQxQ] """ - dL_dK_diag = dL_dK_diag.reshape(-1, 1, 1) + dL_dK_diag = dL_dK_diag.copy().reshape(-1, 1, 1) assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]" - l2 = np.ones(X.shape[1])*self.lengthscale**2 - return (dL_dK_diag * self.variance/(l2[:,None]*l2[None,:]))# np.zeros(X.shape+(X.shape[1],)) + l4 = np.ones(X.shape[1])*self.lengthscale**2 + return dL_dK_diag * (np.eye(X.shape[1]) * self.variance/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],)) #return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape) def _gradients_X_pure(self, dL_dK, X, X2=None): diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 9971e6d6..99951eb1 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -135,10 +135,13 @@ class Kern_check_d2Kdiag_dXdX(Kern_check_model): self.Xc = X.copy() def log_likelihood(self): - return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)) + l = 0. + for i in range(self.X.shape[0]): + l += self.kernel.gradients_X(self.dL_dK[[i],[i]], self.X[[i]], self.Xc[[i]]).sum() + return l def parameters_changed(self): - grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X) + grads = -self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X) self.X.gradient[:] = grads.sum(-1) def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):