mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
fix: basis functions gradients pushed up a level in hierarchy
This commit is contained in:
parent
4f3047e035
commit
09bde76d5d
1 changed files with 36 additions and 22 deletions
|
|
@ -8,14 +8,15 @@ from paramz.caching import Cache_this
|
||||||
from ...util.linalg import tdot, mdot
|
from ...util.linalg import tdot, mdot
|
||||||
|
|
||||||
class BasisFuncKernel(Kern):
|
class BasisFuncKernel(Kern):
|
||||||
def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
|
def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel', _warn_input_dim=True):
|
||||||
"""
|
"""
|
||||||
Abstract superclass for kernels with explicit basis functions for use in GPy.
|
Abstract superclass for kernels with explicit basis functions for use in GPy.
|
||||||
|
|
||||||
This class does NOT automatically add an offset to the design matrix phi!
|
This class does NOT automatically add an offset to the design matrix phi!
|
||||||
"""
|
"""
|
||||||
super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
|
super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
|
||||||
assert self.input_dim==1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
|
if _warn_input_dim:
|
||||||
|
assert self.input_dim==1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
|
||||||
self.ARD = ARD
|
self.ARD = ARD
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
|
phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
|
||||||
|
|
@ -42,6 +43,27 @@ class BasisFuncKernel(Kern):
|
||||||
def Kdiag(self, X, X2=None):
|
def Kdiag(self, X, X2=None):
|
||||||
return np.diag(self._K(X, X2))
|
return np.diag(self._K(X, X2))
|
||||||
|
|
||||||
|
def d_K_d_theta_through_K(self, phi1, dL_dK, dphi1_d_theta, phi2=None, dphi2_d_theta=None, ARD=True):
|
||||||
|
"""
|
||||||
|
Helper to push inner parameter gradients through to K.
|
||||||
|
|
||||||
|
Give phi(X) and the gradient of phi(X) wrt the parameter theta.
|
||||||
|
|
||||||
|
Remember to set the gradient of the respective theta yourself!
|
||||||
|
"""
|
||||||
|
if phi2 is None or phi2 is phi1:
|
||||||
|
if ARD:
|
||||||
|
gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_d_theta)
|
||||||
|
else:
|
||||||
|
gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_d_theta.T)).sum())
|
||||||
|
else:
|
||||||
|
if ARD:
|
||||||
|
gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_d_theta) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_d_theta))
|
||||||
|
else:
|
||||||
|
gradient = np.sum(self.variance * (dL_dK * phi1.dot(dphi2_d_theta.T)).sum() + (dL_dK * phi2.dot(dphi1_d_theta.T)).sum())
|
||||||
|
return np.where(np.isnan(gradient), 0, gradient)
|
||||||
|
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
phi1 = self.phi(X)
|
phi1 = self.phi(X)
|
||||||
|
|
@ -162,7 +184,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel):
|
||||||
|
|
||||||
class DomainKernel(LinearSlopeBasisFuncKernel):
|
class DomainKernel(LinearSlopeBasisFuncKernel):
|
||||||
"""
|
"""
|
||||||
Create a constant plateou of correlation between start and stop and zero
|
Create a constant plateau of correlation between start and stop and zero
|
||||||
elsewhere. This is a constant shift of the outputs along the yaxis
|
elsewhere. This is a constant shift of the outputs along the yaxis
|
||||||
in the range from start to stop.
|
in the range from start to stop.
|
||||||
"""
|
"""
|
||||||
|
|
@ -202,25 +224,17 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
|
super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
|
||||||
if X2 is None or X is X2:
|
|
||||||
phi1 = self.phi(X)
|
phi1 = self.phi(X)
|
||||||
if phi1.ndim != 2:
|
if phi1.ndim != 2:
|
||||||
phi1 = phi1[:, None]
|
phi1 = phi1[:, None]
|
||||||
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
|
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
|
||||||
if self.ARD_slope:
|
|
||||||
self.slope.gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_dl)
|
phi2 = dphi2_dl = None
|
||||||
else:
|
if not(X2 is None or X is X2):
|
||||||
self.slope.gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum())
|
|
||||||
else:
|
|
||||||
phi1 = self.phi(X)
|
|
||||||
phi2 = self.phi(X2)
|
phi2 = self.phi(X2)
|
||||||
if phi1.ndim != 2:
|
if phi2.ndim != 2:
|
||||||
phi1 = phi1[:, None]
|
|
||||||
phi2 = phi2[:, None]
|
phi2 = phi2[:, None]
|
||||||
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
|
|
||||||
dphi2_dl = (phi2**2) * (np.exp(-((X2-self.centers)*self.slope)) * (X2-self.centers))
|
dphi2_dl = (phi2**2) * (np.exp(-((X2-self.centers)*self.slope)) * (X2-self.centers))
|
||||||
if self.ARD_slope:
|
|
||||||
self.slope.gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_dl) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_dl))
|
self.slope.gradient = self.d_K_d_theta_through_K(phi1, dL_dK, dphi1_dl, phi2, dphi2_dl, ARD=self.ARD_slope)
|
||||||
else:
|
|
||||||
self.slope.gradient = np.sum(self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum())
|
|
||||||
self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue