diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py index 1b300661..af4a7188 100644 --- a/GPy/kern/_src/basis_funcs.py +++ b/GPy/kern/_src/basis_funcs.py @@ -5,33 +5,59 @@ from ...core.parameterization.param import Param from ...core.parameterization.transformations import Logexp import numpy as np from ...util.caching import Cache_this -from ...util.linalg import tdot +from ...util.linalg import tdot, mdot class BasisFuncKernel(Kern): - def __init__(self, input_dim, variance=1., active_dims=None, name='basis func kernel'): + def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'): """ Abstract superclass for kernels with explicit basis functions for use in GPy. This class does NOT automatically add an offset to the design matrix phi! """ super(BasisFuncKernel, self).__init__(input_dim, active_dims, name) + self.ARD = ARD + if self.ARD: + phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim))) + variance = variance * np.ones(phi_test.shape[1]) + else: + variance = np.array(variance) self.variance = Param('variance', variance, Logexp()) self.link_parameter(self.variance) + def parameters_changed(self): + self.alpha = np.sqrt(self.variance) + #self.beta = 1./self.variance + + @Cache_this(limit=3, ignore_args=()) def phi(self, X): - raise NotImplementedError('Overwrite this phi function, which maps the input X into the higher dimensional space and forms the design matrix Phi') + return self._phi(X) + + def _phi(self, X): + raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi') def K(self, X, X2=None): - return self.variance * self._K(X, X2) + return self._K(X, X2) def Kdiag(self, X, X2=None): - return self.variance * np.diag(self._K(X, X2)) + return np.diag(self._K(X, X2)) def update_gradients_full(self, dL_dK, X, X2=None): - self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) + if self.ARD: + phi1 = self.phi(X) + if X2 is None or X is X2: + self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1) + else: + phi2 = self.phi(X2) + self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2) + else: + self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) def update_gradients_diag(self, dL_dKdiag, X): - self.variance.gradient = np.einsum('i,i', dL_dKdiag, self._K(X)) + if self.ARD: + phi1 = self.phi(X) + self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1) + else: + self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) def concatenate_offset(self, X): return np.c_[np.ones((X.shape[0], 1)), X] @@ -52,19 +78,19 @@ class BasisFuncKernel(Kern): posterior = self._highest_parent_.posterior except NameError: raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference") - phi = self.phi(X) - return self.variance * phi.T.dot(posterior.woodbury_vector), self.variance * (1 - self.variance * phi.T.dot(posterior.woodbury_inv.dot(phi))) + phi_alpha = self.phi(X) * self.variance + return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha)) @Cache_this(limit=3, ignore_args=()) def _K(self, X, X2): if X2 is None or X is X2: - phi = self.phi(X) + phi = self.phi(X) * self.alpha if phi.ndim != 2: phi = phi[:, None] return tdot(phi) else: - phi1 = self.phi(X) - phi2 = self.phi(X2) + phi1 = self.phi(X) * self.alpha + phi2 = self.phi(X2) * self.alpha if phi1.ndim != 2: phi1 = phi1[:, None] phi2 = phi2[:, None] @@ -72,30 +98,43 @@ class BasisFuncKernel(Kern): class LinearSlopeBasisFuncKernel(BasisFuncKernel): - def __init__(self, input_dim, start, stop, variance=1., active_dims=None, name='linear_segment'): - super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name) + def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'): + """ + A linear segment transformation. The segments start at start, \ + are then linear to stop and constant again. The segments are + normalized, so that they have exactly as much mass above + as below the origin. + + Start and stop can be tuples or lists of starts and stops. + Behaviour of start stop is as np.where(X self.stop, self.stop, phi) return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1. return self.concatenate_offset(phi) # ((phi-self.start)/(self.stop-self.start))-.5 class ChangePointBasisFuncKernel(BasisFuncKernel): - def __init__(self, input_dim, changepoint, variance=1., active_dims=None, name='changepoint'): - super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name) + def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'): self.changepoint = changepoint + super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) @Cache_this(limit=3, ignore_args=()) - def phi(self, X): + def _phi(self, X): return self.concatenate_offset(np.where((X < self.changepoint), -1, 1)) class DomainKernel(LinearSlopeBasisFuncKernel): + def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'): + super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name) + @Cache_this(limit=3, ignore_args=()) - def phi(self, X): + def _phi(self, X): phi = np.where((X>self.start)*(X