mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-30 14:35:15 +02:00
Simplified the way how multioutput kernel updates and uses gradients. As a consequense, DiffKern cannot be used as a standalone kernel anymore
This commit is contained in:
parent
03c2838bb9
commit
29b9435a8e
6 changed files with 179 additions and 50 deletions
|
|
@ -1,21 +1,21 @@
|
||||||
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
from .kern import CombinationKernel
|
from .kern import Kern
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from paramz.caching import Cache_this
|
from paramz.caching import Cache_this
|
||||||
|
|
||||||
class DiffKern(CombinationKernel):
|
class DiffKern(Kern):
|
||||||
"""
|
"""
|
||||||
Diff kernel is a thin wrapper for using partial derivatives of kernels as kernels. Eg. in combination with
|
Diff kernel is a thin wrapper for using partial derivatives of kernels as kernels. Eg. in combination with
|
||||||
Multioutput kernel this allows the user to train GPs with observations of latent function and latent
|
Multioutput kernel this allows the user to train GPs with observations of latent function and latent
|
||||||
function derivatives
|
function derivatives. NOTE: DiffKern only works when used with Multioutput kernel. Do not use the kernel as standalone
|
||||||
|
|
||||||
The parameters the kernel needs are:
|
The parameters the kernel needs are:
|
||||||
-'base_kern': a member of Kernel class that is used for observations
|
-'base_kern': a member of Kernel class that is used for observations
|
||||||
-'dimension': integer that indigates in which dimensions the partial derivative observations are
|
-'dimension': integer that indigates in which dimensions the partial derivative observations are
|
||||||
"""
|
"""
|
||||||
def __init__(self, base_kern, dimension):
|
def __init__(self, base_kern, dimension):
|
||||||
super(DiffKern, self).__init__([base_kern], 'DiffKern')
|
super(DiffKern, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='DiffKern')
|
||||||
self.base_kern = base_kern
|
self.base_kern = base_kern
|
||||||
self.dimension = dimension
|
self.dimension = dimension
|
||||||
|
|
||||||
|
|
@ -45,11 +45,13 @@ class DiffKern(CombinationKernel):
|
||||||
def reset_gradients(self):
|
def reset_gradients(self):
|
||||||
self.base_kern.reset_gradients()
|
self.base_kern.reset_gradients()
|
||||||
|
|
||||||
def get_gradient(self):
|
@property
|
||||||
|
def gradient(self):
|
||||||
return self.base_kern.gradient
|
return self.base_kern.gradient
|
||||||
|
|
||||||
def append_gradient(self, gradient):
|
@gradient.setter
|
||||||
self.base_kern.gradient += gradient
|
def gradient(self, gradient):
|
||||||
|
self.base_kern.gradient = gradient
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
|
||||||
if dimX2 is None:
|
if dimX2 is None:
|
||||||
|
|
@ -65,11 +67,19 @@ class DiffKern(CombinationKernel):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
X2 = X
|
||||||
gradients = self.base_kern.dgradients_dX(X,X2, self.dimension)
|
gradients = self.base_kern.dgradients_dX(X,X2, self.dimension)
|
||||||
self.base_kern.append_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
||||||
|
|
||||||
def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
|
def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
|
||||||
gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension)
|
gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension)
|
||||||
self.base_kern.append_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
|
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension]
|
||||||
|
return np.sum(tmp, axis=1)
|
||||||
|
|
||||||
|
def gradients_X2(self, dL_dK, X, X2):
|
||||||
|
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :]
|
||||||
|
return np.sum(tmp, axis=1)
|
||||||
|
|
||||||
def _convert_gradients(self, l,g, f = lambda x:x):
|
def _convert_gradients(self, l,g, f = lambda x:x):
|
||||||
if type(g) is np.ndarray:
|
if type(g) is np.ndarray:
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,12 @@ class Kern(Parameterized):
|
||||||
except:
|
except:
|
||||||
return X[:, self._all_dims_active]
|
return X[:, self._all_dims_active]
|
||||||
|
|
||||||
|
def _project_dim(self, dim):
|
||||||
|
try:
|
||||||
|
return np.where(self._all_dims_active == dim)[0][0]
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
def K(self, X, X2):
|
def K(self, X, X2):
|
||||||
"""
|
"""
|
||||||
Compute the kernel function.
|
Compute the kernel function.
|
||||||
|
|
@ -201,12 +207,6 @@ class Kern(Parameterized):
|
||||||
def reset_gradients(self):
|
def reset_gradients(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def get_gradient(self):
|
|
||||||
return self.gradient
|
|
||||||
|
|
||||||
def append_gradient(self,gradient):
|
|
||||||
self.gradient += gradient
|
|
||||||
|
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
"""
|
"""
|
||||||
Set the gradients of all parameters when doing inference with
|
Set the gradients of all parameters when doing inference with
|
||||||
|
|
|
||||||
|
|
@ -19,16 +19,26 @@ def put_clean(dct, name, func):
|
||||||
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
def __new__(cls, name, bases, dct):
|
def __new__(cls, name, bases, dct):
|
||||||
put_clean(dct, 'K', _slice_K)
|
put_clean(dct, 'K', _slice_K)
|
||||||
|
put_clean(dct, 'dK_dX', _slice_dK_dX)
|
||||||
|
put_clean(dct, 'dK_dX2', _slice_dK_dX)
|
||||||
|
put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2)
|
||||||
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
||||||
put_clean(dct, 'phi', _slice_Kdiag)
|
put_clean(dct, 'phi', _slice_Kdiag)
|
||||||
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
||||||
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||||
|
put_clean(dct, 'update_gradients_dK_dX', _slice_update_gradients_full)
|
||||||
|
put_clean(dct, 'update_gradients_dK_dX2', _slice_update_gradients_full)
|
||||||
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
||||||
|
put_clean(dct, 'gradients_X2', _slice_gradients_X)
|
||||||
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
|
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
|
||||||
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
|
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
|
||||||
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
|
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
|
||||||
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||||
|
|
||||||
|
put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X)
|
||||||
|
put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X)
|
||||||
|
put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX)
|
||||||
|
|
||||||
put_clean(dct, 'psi0', _slice_psi)
|
put_clean(dct, 'psi0', _slice_psi)
|
||||||
put_clean(dct, 'psi1', _slice_psi)
|
put_clean(dct, 'psi1', _slice_psi)
|
||||||
put_clean(dct, 'psi2', _slice_psi)
|
put_clean(dct, 'psi2', _slice_psi)
|
||||||
|
|
@ -79,6 +89,20 @@ class _Slice_wrap(object):
|
||||||
return ret
|
return ret
|
||||||
return return_val
|
return return_val
|
||||||
|
|
||||||
|
def handle_return_list(self, return_val):
|
||||||
|
if self.ret:
|
||||||
|
ret = [np.zeros(self.shape) for _ in range(len(return_val))]
|
||||||
|
if len(self.shape) == 3:
|
||||||
|
for i in range(len(return_val)):
|
||||||
|
ret[i][self.k._all_dims_active, :, :] = return_val[i]
|
||||||
|
#[ret[i].__setitem__((:, :, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))]
|
||||||
|
elif len(self.shape) == 4:
|
||||||
|
for i in range(len(return_val)):
|
||||||
|
ret[i][np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val[i]
|
||||||
|
#[ret[i].__setitem__((:, :, self.k._all_dims_active, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))]
|
||||||
|
return ret
|
||||||
|
return return_val
|
||||||
|
|
||||||
def _slice_K(f):
|
def _slice_K(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, X, X2 = None, *a, **kw):
|
def wrap(self, X, X2 = None, *a, **kw):
|
||||||
|
|
@ -97,15 +121,15 @@ def _slice_Kdiag(f):
|
||||||
|
|
||||||
def _slice_update_gradients_full(f):
|
def _slice_update_gradients_full(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dK, X, X2=None):
|
def wrap(self, dL_dK, X, X2=None, *a, **kw):
|
||||||
with _Slice_wrap(self, X, X2) as s:
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
ret = f(self, dL_dK, s.X, s.X2)
|
ret = f(self, dL_dK, s.X, s.X2, *a, **kw)
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
def _slice_update_gradients_diag(f):
|
def _slice_update_gradients_diag(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dKdiag, X):
|
def wrap(self, dL_dKdiag, X, *a, **kw):
|
||||||
with _Slice_wrap(self, X, None) as s:
|
with _Slice_wrap(self, X, None) as s:
|
||||||
ret = f(self, dL_dKdiag, s.X)
|
ret = f(self, dL_dKdiag, s.X)
|
||||||
return ret
|
return ret
|
||||||
|
|
@ -119,6 +143,99 @@ def _slice_gradients_X(f):
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
|
def _slice_dK_dX(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim, *a, **kw):
|
||||||
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
|
d = s.k._project_dim(dim)
|
||||||
|
if d is None:
|
||||||
|
ret = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, d, *a, **kw)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_dK2_dXdX2(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dimX, dimX2, *a, **kw):
|
||||||
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
|
d = s.k._project_dim(dimX)
|
||||||
|
d2 = s.k._project_dim(dimX2)
|
||||||
|
if (d is None) or (d2 is None):
|
||||||
|
ret = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, d, d2, *a, **kw)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_X(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
Q1 = X.shape[1]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s:
|
||||||
|
ret = s.handle_return_array(f(self, s.X, s.X2, dim))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_list_X(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
|
||||||
|
d = s.k._project_dim(dim)
|
||||||
|
if d is None:
|
||||||
|
ret = [np.zeros((N, M)) for i in range(s.k.size)]
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, d)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_XX(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim, dimX2):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
Q1 = X.shape[1]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
Q1, Q2 = X.shape[1], X2.shape[1]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
|
||||||
|
ret = s.handle_return_array(f(self, s.X, s.X2, dim, dimX2))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_list_XX(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dimX, dimX2):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
|
||||||
|
d = s.k._project_dim(dimX)
|
||||||
|
d2 = s.k._project_dim(dimX2)
|
||||||
|
if (d is None) or (d2 is None):
|
||||||
|
ret = [np.zeros((N, M)) for i in range(s.k.size)]
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, d, d2)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_gradient_derivatives_X(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, dL_dK, X, X2=None):
|
||||||
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
|
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
def _slice_gradients_X_diag(f):
|
def _slice_gradients_X_diag(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dKdiag, X):
|
def wrap(self, dL_dKdiag, X):
|
||||||
|
|
|
||||||
|
|
@ -4,29 +4,32 @@
|
||||||
from .kern import Kern, CombinationKernel
|
from .kern import Kern, CombinationKernel
|
||||||
from .multioutput_kern import MultioutputKern, ZeroKern
|
from .multioutput_kern import MultioutputKern, ZeroKern
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
class KernWrapper(Kern):
|
class KernWrapper(Kern):
|
||||||
def __init__(self, fk, fug, fg, base_kern):
|
def __init__(self, fk, fug, fg, base_kern):
|
||||||
self.fk = fk
|
self.fk = fk
|
||||||
self.fug = fug
|
self.fug = fug
|
||||||
self.fg = fg
|
self.fg = fg
|
||||||
self.base_kern
|
self.base_kern = base_kern
|
||||||
super(KernWrapper, self).__init__(1, None, name='KernWrapper',useGPU=False)
|
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False)
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
def K(self, X, X2=None):
|
||||||
return self.fk(X,X2=X2)
|
return self.fk(X,X2=X2)
|
||||||
|
|
||||||
def update_gradients_full(self,dL_dK, X, X2=None):
|
def update_gradients_full(self,dL_dK, X, X2=None):
|
||||||
return self.fug(dK_dK, X, X2=X2)
|
return self.fug(dL_dK, X, X2=X2)
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
return self.fg(dL_dK, X, X2=X2)
|
return self.fg(dL_dK, X, X2=X2)
|
||||||
|
|
||||||
def get_gradient(self):
|
@property
|
||||||
|
def gradient(self):
|
||||||
return self.base_kern.gradient
|
return self.base_kern.gradient
|
||||||
|
|
||||||
def append_gradient(self, gradient):
|
@gradient.setter
|
||||||
self.base_kern.gradient += gradient
|
def gradient(self, gradient):
|
||||||
|
self.base_kern.gradient = gradient
|
||||||
|
|
||||||
class MultioutputDerivativeKern(MultioutputKern):
|
class MultioutputDerivativeKern(MultioutputKern):
|
||||||
"""
|
"""
|
||||||
|
|
@ -58,26 +61,23 @@ class MultioutputDerivativeKern(MultioutputKern):
|
||||||
unique=True
|
unique=True
|
||||||
for j in range(0,nl):
|
for j in range(0,nl):
|
||||||
if i==j or (kernels[i] is kernels[j]):
|
if i==j or (kernels[i] is kernels[j]):
|
||||||
covariance[i][j] = {'kern': kernels[i],'K':kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X}
|
kern = kernels[i]
|
||||||
if i>j:
|
if i>j:
|
||||||
unique=False
|
unique=False
|
||||||
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
||||||
covariance[i][j] = cross_covariances.get((i,j))
|
kern = cross_covariances.get((i,j))
|
||||||
elif kernels[i].name == 'diffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other
|
elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other
|
||||||
kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j])
|
kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j])
|
||||||
covariance[i][j] = {'kern':kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
|
|
||||||
unique=False
|
unique=False
|
||||||
elif kernels[j].name == 'diffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other
|
elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other
|
||||||
kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i])
|
kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i])
|
||||||
covariance[i][j] = {'kern':kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
|
elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives
|
||||||
elif kernels[i].name == 'diffKern' and kernels[j].name == 'diffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives
|
|
||||||
kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern)
|
kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern)
|
||||||
covariance[i][j] = {'kern':kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
|
|
||||||
if i>j:
|
if i>j:
|
||||||
unique=False
|
unique=False
|
||||||
else:
|
else:
|
||||||
kern = ZeroKern()
|
kern = ZeroKern()
|
||||||
covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
|
covariance[i][j] = kern
|
||||||
if unique is True:
|
if unique is True:
|
||||||
linked.append(i)
|
linked.append(i)
|
||||||
self.covariance = covariance
|
self.covariance = covariance
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,13 @@ class ZeroKern(Kern):
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
return np.zeros((X.shape[0],X.shape[1]))
|
return np.zeros((X.shape[0],X.shape[1]))
|
||||||
|
@property
|
||||||
|
def gradient(self):
|
||||||
|
return np.empty((1,0))
|
||||||
|
|
||||||
|
@gradient.setter
|
||||||
|
def gradient(self, gradient):
|
||||||
|
pass
|
||||||
|
|
||||||
class MultioutputKern(CombinationKernel):
|
class MultioutputKern(CombinationKernel):
|
||||||
"""
|
"""
|
||||||
|
|
@ -65,14 +72,13 @@ class MultioutputKern(CombinationKernel):
|
||||||
unique=True
|
unique=True
|
||||||
for j in range(0,nl):
|
for j in range(0,nl):
|
||||||
if i==j or (kernels[i] is kernels[j]):
|
if i==j or (kernels[i] is kernels[j]):
|
||||||
covariance[i][j] = {'kern': kernels[i], 'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X}
|
covariance[i][j] = kernels[i]
|
||||||
if i>j:
|
if i>j:
|
||||||
unique=False
|
unique=False
|
||||||
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
||||||
covariance[i][j] = cross_covariances.get((i,j))
|
covariance[i][j] = cross_covariances.get((i,j))
|
||||||
else: # zero covariance structure
|
else: # zero covariance structure
|
||||||
kern = ZeroKern()
|
covariance[i][j] = ZeroKern()
|
||||||
covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
|
|
||||||
if unique is True:
|
if unique is True:
|
||||||
linked.append(i)
|
linked.append(i)
|
||||||
self.covariance = covariance
|
self.covariance = covariance
|
||||||
|
|
@ -85,7 +91,7 @@ class MultioutputKern(CombinationKernel):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j]['K'](X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
|
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
|
@ -96,15 +102,15 @@ class MultioutputKern(CombinationKernel):
|
||||||
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def _update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2):
|
def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2):
|
||||||
gradient = cov_struct['kern'].get_gradient().copy()
|
gradient = kern.gradient.copy()
|
||||||
cov_struct['update_gradients_full'](dL_dK, X, X2)
|
kern.update_gradients_full(dL_dK, X, X2)
|
||||||
cov_struct['kern'].append_gradient(gradient)
|
kern.gradient += gradient
|
||||||
|
|
||||||
def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X):
|
def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X):
|
||||||
gradient = kern.get_gradient().copy()
|
gradient = kern.gradient.copy()
|
||||||
kern.update_gradients_diag(dL_dKdiag, X)
|
kern.update_gradients_diag(dL_dKdiag, X)
|
||||||
kern.append_gradient(gradient)
|
kern.gradient += gradient
|
||||||
|
|
||||||
def reset_gradients(self):
|
def reset_gradients(self):
|
||||||
for kern in self.kern: kern.reset_gradients()
|
for kern in self.kern: kern.reset_gradients()
|
||||||
|
|
@ -121,14 +127,14 @@ class MultioutputKern(CombinationKernel):
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
self.reset_gradients()
|
self.reset_gradients()
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
[[ self._update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
|
[[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
target = np.zeros((X.shape[0], X.shape[1]) )
|
target = np.zeros((X.shape[0], X.shape[1]) )
|
||||||
if X2 is not None:
|
if X2 is not None:
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
|
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
|
||||||
else:
|
else:
|
||||||
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
|
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
|
||||||
return target
|
return target
|
||||||
|
|
@ -222,10 +222,6 @@ class Stationary(Kern):
|
||||||
self.variance.gradient = dL_dVar
|
self.variance.gradient = dL_dVar
|
||||||
self.lengthscale.gradient = dL_dLen
|
self.lengthscale.gradient = dL_dLen
|
||||||
|
|
||||||
def append_gradients_direct(self, dL_dVar, dL_dLen):
|
|
||||||
self.variance.gradient += dL_dVar
|
|
||||||
self.lengthscale.gradient += dL_dLen
|
|
||||||
|
|
||||||
def _inv_dist(self, X, X2=None):
|
def _inv_dist(self, X, X2=None):
|
||||||
"""
|
"""
|
||||||
Compute the elementwise inverse of the distance matrix, expecpt on the
|
Compute the elementwise inverse of the distance matrix, expecpt on the
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue