[basisfuncs] updated kernel to better reflect linear trends and added ARD support

2026-05-07 02:52:40 +02:00 · 2015-04-21 11:18:31 +02:00 · 2015-04-21 11:18:31 +02:00 · d428465e08
commit d428465e08
parent 440d7b6478
2 changed files with 58 additions and 21 deletions
--- a/GPy/kern/_src/basis_funcs.py
+++ b/GPy/kern/_src/basis_funcs.py
@ -5,33 +5,59 @@ from ...core.parameterization.param import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
 from ...util.caching import Cache_this
-from ...util.linalg import tdot
+from ...util.linalg import tdot, mdot

 class BasisFuncKernel(Kern):
-    def __init__(self, input_dim, variance=1., active_dims=None, name='basis func kernel'):
+    def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
        """
        Abstract superclass for kernels with explicit basis functions for use in GPy.
        
        This class does NOT automatically add an offset to the design matrix phi!
        """
        super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
+        self.ARD = ARD
+        if self.ARD:
+            phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
+            variance = variance * np.ones(phi_test.shape[1])
+        else:
+            variance = np.array(variance)
        self.variance = Param('variance', variance, Logexp())
        self.link_parameter(self.variance)
    
+    def parameters_changed(self):
+        self.alpha = np.sqrt(self.variance)
+        #self.beta = 1./self.variance
+    
+    @Cache_this(limit=3, ignore_args=())
    def phi(self, X):
-        raise NotImplementedError('Overwrite this phi function, which maps the input X into the higher dimensional space and forms the design matrix Phi')
+        return self._phi(X)
+
+    def _phi(self, X):
+        raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
        
    def K(self, X, X2=None):
-        return self.variance * self._K(X, X2)
+        return self._K(X, X2)

    def Kdiag(self, X, X2=None):
-        return self.variance * np.diag(self._K(X, X2))
+        return np.diag(self._K(X, X2))
    
    def update_gradients_full(self, dL_dK, X, X2=None):
-        self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
+        if self.ARD:
+            phi1 = self.phi(X)
+            if X2 is None or X is X2:
+                self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1)
+            else:
+                phi2 = self.phi(X2)
+                self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
+        else:
+            self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
        
    def update_gradients_diag(self, dL_dKdiag, X):
-        self.variance.gradient = np.einsum('i,i', dL_dKdiag, self._K(X))
+        if self.ARD:
+            phi1 = self.phi(X)
+            self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
+        else:
+            self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X))
        
    def concatenate_offset(self, X):
        return np.c_[np.ones((X.shape[0], 1)), X]
@ -52,19 +78,19 @@ class BasisFuncKernel(Kern):
                posterior = self._highest_parent_.posterior
            except NameError:
                raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
-        phi = self.phi(X)
-        return self.variance * phi.T.dot(posterior.woodbury_vector), self.variance * (1 - self.variance * phi.T.dot(posterior.woodbury_inv.dot(phi)))
+        phi_alpha = self.phi(X) * self.variance
+        return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
    
    @Cache_this(limit=3, ignore_args=())
    def _K(self, X, X2):
        if X2 is None or X is X2:
-            phi = self.phi(X)
+            phi = self.phi(X) * self.alpha
            if phi.ndim != 2:
                phi = phi[:, None]
            return tdot(phi)
        else:
-            phi1 = self.phi(X)
-            phi2 = self.phi(X2)
+            phi1 = self.phi(X) * self.alpha
+            phi2 = self.phi(X2) * self.alpha
            if phi1.ndim != 2:
                phi1 = phi1[:, None]
                phi2 = phi2[:, None]
@ -72,30 +98,43 @@ class BasisFuncKernel(Kern):
        
        
 class LinearSlopeBasisFuncKernel(BasisFuncKernel):
-    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, name='linear_segment'):
-        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
+        """
+        A linear segment transformation. The segments start at start, \
+        are then linear to stop and constant again. The segments are 
+        normalized, so that they have exactly as much mass above 
+        as below the origin. 
+        
+        Start and stop can be tuples or lists of starts and stops. 
+        Behaviour of start stop is as np.where(X<start) would do.
+        """
+        
        self.start = np.array(start)
        self.stop = np.array(stop)
+        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
    
    @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
        phi = np.where(X < self.start, self.start, X)
        phi = np.where(phi > self.stop, self.stop, phi)
        return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
        return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
    
 class ChangePointBasisFuncKernel(BasisFuncKernel):
-    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, name='changepoint'):
-        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
        self.changepoint = changepoint
+        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
    
    @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
        return self.concatenate_offset(np.where((X < self.changepoint), -1, 1))

 class DomainKernel(LinearSlopeBasisFuncKernel):
+    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
+        super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
+    
    @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
        phi = np.where((X>self.start)*(X<self.stop), 1., 0.)
        return phi#((phi-self.start)/(self.stop-self.start))-.5
        return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@ -68,8 +68,6 @@ class Periodic(Kern):
        return np.diag(self.K(X))


-
-
 class PeriodicExponential(Periodic):
    """
    Kernel of the periodic subspace (up to a given frequency) of a exponential