fix: basis functions gradients pushed up a level in hierarchy

2026-07-26 17:11:06 +02:00 · 2018-10-12 08:54:13 +01:00 · 2018-10-12 08:54:13 +01:00 · 09bde76d5d
commit 09bde76d5d
parent 4f3047e035
1 changed files with 36 additions and 22 deletions
--- a/GPy/kern/src/basis_funcs.py
+++ b/GPy/kern/src/basis_funcs.py
@ -8,14 +8,15 @@ from paramz.caching import Cache_this
 from ...util.linalg import tdot, mdot
 class BasisFuncKernel(Kern):
-    def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
+    def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel', _warn_input_dim=True):
        """
        Abstract superclass for kernels with explicit basis functions for use in GPy.
        This class does NOT automatically add an offset to the design matrix phi!
        """
        super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
-        assert self.input_dim==1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
+        if _warn_input_dim:
            assert self.input_dim==1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
        self.ARD = ARD
        if self.ARD:
            phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
@ -42,6 +43,27 @@ class BasisFuncKernel(Kern):
    def Kdiag(self, X, X2=None):
        return np.diag(self._K(X, X2))
    def d_K_d_theta_through_K(self, phi1, dL_dK, dphi1_d_theta, phi2=None, dphi2_d_theta=None, ARD=True):
        """
        Helper to push inner parameter gradients through to K.
        Give phi(X) and the gradient of phi(X) wrt the parameter theta.
        Remember to set the gradient of the respective theta yourself! 
        """
        if phi2 is None or phi2 is phi1:
            if ARD:
                gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_d_theta)
            else:
                gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_d_theta.T)).sum())
        else:
            if ARD:
                gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_d_theta) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_d_theta))
            else:
                gradient = np.sum(self.variance * (dL_dK * phi1.dot(dphi2_d_theta.T)).sum() + (dL_dK * phi2.dot(dphi1_d_theta.T)).sum())
        return np.where(np.isnan(gradient), 0, gradient)
    def update_gradients_full(self, dL_dK, X, X2=None):
        if self.ARD:
            phi1 = self.phi(X)
@ -162,7 +184,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel):
 class DomainKernel(LinearSlopeBasisFuncKernel):
    """
-    Create a constant plateou of correlation between start and stop and zero
+    Create a constant plateau of correlation between start and stop and zero
    elsewhere. This is a constant shift of the outputs along the yaxis
    in the range from start to stop.
    """
@ -202,25 +224,17 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
    def update_gradients_full(self, dL_dK, X, X2=None):
        super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
-        if X2 is None or X is X2:
+        
-            phi1 = self.phi(X)
+        phi1 = self.phi(X)
-            if phi1.ndim != 2:
+        if phi1.ndim != 2:
-                phi1 = phi1[:, None]
+            phi1 = phi1[:, None]
-            dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
+        dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
-            if self.ARD_slope:
+        
-                self.slope.gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_dl)
+        phi2 = dphi2_dl = None
-            else:
+        if not(X2 is None or X is X2):
                self.slope.gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum())
        else:
            phi1 = self.phi(X)
            phi2 = self.phi(X2)
-            if phi1.ndim != 2:
+            if phi2.ndim != 2:
                phi1 = phi1[:, None]
                phi2 = phi2[:, None]
            dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
            dphi2_dl = (phi2**2) * (np.exp(-((X2-self.centers)*self.slope)) * (X2-self.centers))
-            if self.ARD_slope:
+            
-                self.slope.gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_dl) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_dl))
+        self.slope.gradient = self.d_K_d_theta_through_K(phi1, dL_dK, dphi1_dl, phi2, dphi2_dl, ARD=self.ARD_slope)
            else:
                self.slope.gradient = np.sum(self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum())
        self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient)