[dxxdiag] some steps towards the diagonal gradients in xx

2026-06-11 15:15:15 +02:00 · 2016-06-08 10:22:36 +01:00 · 2016-06-08 10:22:36 +01:00 · 787168a394
commit 787168a394
parent a3f458926b
4 changed files with 42 additions and 40 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -377,7 +377,7 @@ class GP(Model):
        if full_cov:
            dK2_dXdX = kern.gradients_XX(one, Xnew)
        else:
-            dK2_dXdX = -kern.gradients_XX(one, Xnew).sum(0)
+            dK2_dXdX = kern.gradients_XX(one, Xnew).sum(0)
            #dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
            #for i in range(Xnew.shape[0]):
            #    dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@ -85,23 +85,22 @@ class Add(CombinationKernel):
        [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
        return target

-    def gradients_XX(self, dL_dK, X, X2, cov=True):
-        if cov: # full covarance
-            if X2 is None:
-                target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
-            else:
-                target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
-        else: # diagonal covariance
-            if X2 is None:
-                target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
-            else:
-                target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
-        [target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=cov)) for p in self.parts]
+    def gradients_XX(self, dL_dK, X, X2):
+        if X2 is None:
+            target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
+        else:
+            target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
+        #else: # diagonal covariance
+        #    if X2 is None:
+        #        target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
+        #    else:
+        #        target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
+        [target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
        return target

-    def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
+    def gradients_XX_diag(self, dL_dKdiag, X):
        target = np.zeros(X.shape+(X.shape[1],))
-        [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X, cov=cov)) for p in self.parts]
+        [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
        return target

    @Cache_this(limit=3, force_kwargs=['which_parts'])
@ -188,7 +187,7 @@ class Add(CombinationKernel):

    def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
        tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1)
-        
+
        if not self._exact_psicomp: return Kern.update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
        from .static import White, Bias
        for p1 in self.parts:
@ -200,9 +199,9 @@ class Add(CombinationKernel):
                if isinstance(p2, White):
                    continue
                elif isinstance(p2, Bias):
-                    eff_dL_dpsi1 += tmp * p2.variance 
+                    eff_dL_dpsi1 += tmp * p2.variance
                else:# np.setdiff1d(p1._all_dims_active, ar2, assume_unique): # TODO: Careful, not correct for overlapping _all_dims_active
-                    eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) 
+                    eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
            p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)

    def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
@ -219,7 +218,7 @@ class Add(CombinationKernel):
                if isinstance(p2, White):
                    continue
                elif isinstance(p2, Bias):
-                    eff_dL_dpsi1 += tmp * p2.variance 
+                    eff_dL_dpsi1 += tmp * p2.variance
                else:
                    eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
            target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
@ -227,7 +226,7 @@ class Add(CombinationKernel):

    def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
        tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1)
-        
+
        if not self._exact_psicomp: return Kern.gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
        from .static import White, Bias
        target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
@ -240,9 +239,9 @@ class Add(CombinationKernel):
                if isinstance(p2, White):
                    continue
                elif isinstance(p2, Bias):
-                    eff_dL_dpsi1 += tmp * p2.variance 
+                    eff_dL_dpsi1 += tmp * p2.variance
                else:
-                    eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior) 
+                    eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
            grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
            [np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
        return target_grads
@ -255,7 +254,7 @@ class Add(CombinationKernel):
    #            other.unlink_parameter(p)
    #        parts.extend(other.parts)
    #        #self.link_parameters(*other_params)
-    #        
+    #
    #    else:
    #        #self.link_parameter(other)
    #        parts.append(other)
@ -271,7 +270,7 @@ class Add(CombinationKernel):
        else:

            return super(Add, self).input_sensitivity(summarize)
-            
+
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
@ -283,12 +282,12 @@ class Add(CombinationKernel):
                part_param_num = len(p.param_array) # number of parameters in the part
                p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
                part_start_param_index += part_param_num
-    
+
    def sde(self):
        """
        Support adding kernels for sde representation
        """
-        
+
        import scipy.linalg as la

        F     = None
@ -312,51 +311,51 @@ class Add(CombinationKernel):
            L = la.block_diag(L,Lt) if (L is not None) else Lt
            Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
            H = np.hstack((H,Ht)) if (H is not None) else Ht
-             
+
            Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
            P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
-            
+
            if dF is not None:
                dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
                        'constant', constant_values=0)
                dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
            else:
                dF = dFt
-             
+
            if dQc is not None:
                dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
                        'constant', constant_values=0)
                dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
            else:
                dQc = dQct
-             
+
            if dPinf is not None:
                dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
                        'constant', constant_values=0)
                dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
            else:
                dPinf = dPinft
-                
+
            if dP0 is not None:
                dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
                        'constant', constant_values=0)
                dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
            else:
                dP0 = dP0t
-                
+
            n += Ft.shape[0]
            nq += Qct.shape[0]
            nd += dFt.shape[2]
-        
+
        assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
        assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
        assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
        assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
        assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
-        assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"        
+        assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
        assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
        assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
        assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
        assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
-        
+
        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@ -266,11 +266,11 @@ class Stationary(Kern):
        ..returns:
            dL2_dXdX: [NxQxQ]
        """
-        dL_dK_diag = dL_dK_diag.reshape(-1, 1, 1)
+        dL_dK_diag = dL_dK_diag.copy().reshape(-1, 1, 1)
        assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"

-        l2 =  np.ones(X.shape[1])*self.lengthscale**2
-        return (dL_dK_diag * self.variance/(l2[:,None]*l2[None,:]))# np.zeros(X.shape+(X.shape[1],))
+        l4 =  np.ones(X.shape[1])*self.lengthscale**2
+        return dL_dK_diag * (np.eye(X.shape[1]) * self.variance/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
        #return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)

    def _gradients_X_pure(self, dL_dK, X, X2=None):
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -135,10 +135,13 @@ class Kern_check_d2Kdiag_dXdX(Kern_check_model):
        self.Xc = X.copy()

    def log_likelihood(self):
-        return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X))
+        l = 0.
+        for i in range(self.X.shape[0]):
+            l += self.kernel.gradients_X(self.dL_dK[[i],[i]], self.X[[i]], self.Xc[[i]]).sum()
+        return l

    def parameters_changed(self):
-        grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
+        grads = -self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
        self.X.gradient[:] = grads.sum(-1)

 def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):