mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 03:52:39 +02:00
Combination Kernel for add and prod
This commit is contained in:
parent
7e9078b0f9
commit
2d8246d33f
3 changed files with 94 additions and 91 deletions
|
|
@ -5,40 +5,19 @@ import numpy as np
|
||||||
import itertools
|
import itertools
|
||||||
from ...core.parameterization import Parameterized
|
from ...core.parameterization import Parameterized
|
||||||
from ...util.caching import Cache_this
|
from ...util.caching import Cache_this
|
||||||
from kern import Kern
|
from kern import CombinationKernel
|
||||||
|
|
||||||
class Add(Kern):
|
class Add(CombinationKernel):
|
||||||
def __init__(self, subkerns, tensor):
|
|
||||||
assert all([isinstance(k, Kern) for k in subkerns])
|
|
||||||
if tensor:
|
|
||||||
input_dim = sum([k.input_dim for k in subkerns])
|
|
||||||
self.input_slices = []
|
|
||||||
n = 0
|
|
||||||
for k in subkerns:
|
|
||||||
self.input_slices.append(slice(n, n+k.input_dim))
|
|
||||||
n += k.input_dim
|
|
||||||
else:
|
|
||||||
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
|
|
||||||
input_dim = subkerns[0].input_dim
|
|
||||||
self.input_slices = [slice(None) for k in subkerns]
|
|
||||||
super(Add, self).__init__(input_dim, 'add')
|
|
||||||
self.add_parameters(*subkerns)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def parts(self):
|
|
||||||
return self._parameters_
|
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
|
||||||
"""
|
"""
|
||||||
Compute the kernel function.
|
Add given list of kernels together.
|
||||||
|
propagates gradients thorugh.
|
||||||
:param X: the first set of inputs to the kernel
|
|
||||||
:param X2: (optional) the second set of arguments to the kernel. If X2
|
|
||||||
is None, this is passed throgh to the 'part' object, which
|
|
||||||
handLes this as X2 == X.
|
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, subkerns, name='add'):
|
||||||
|
super(Add, self).__init__(subkerns, name)
|
||||||
|
|
||||||
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
|
def K(self, X, X2=None, which_parts=None):
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
which_parts=None
|
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
elif not isinstance(which_parts, (list, tuple)):
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
|
@ -46,12 +25,6 @@ class Add(Kern):
|
||||||
which_parts = [which_parts]
|
which_parts = [which_parts]
|
||||||
return sum([p.K(X, X2) for p in which_parts])
|
return sum([p.K(X, X2) for p in which_parts])
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
|
||||||
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dK, X):
|
|
||||||
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
"""Compute the gradient of the objective function with respect to X.
|
"""Compute the gradient of the objective function with respect to X.
|
||||||
|
|
||||||
|
|
@ -67,8 +40,8 @@ class Add(Kern):
|
||||||
target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2)
|
target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def Kdiag(self, X):
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
which_parts=None
|
def Kdiag(self, X, which_parts=None):
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
import numpy as np
|
||||||
from ...core.parameterization.parameterized import ParametersChangedMeta, Parameterized
|
from ...core.parameterization.parameterized import ParametersChangedMeta, Parameterized
|
||||||
from ...util.caching import Cache_this
|
from ...util.caching import Cache_this
|
||||||
|
|
||||||
|
|
@ -14,6 +15,9 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
instance.update_gradients_diag = instance._slice_wrapper(instance.update_gradients_diag, True, True)
|
instance.update_gradients_diag = instance._slice_wrapper(instance.update_gradients_diag, True, True)
|
||||||
instance.gradients_X = instance._slice_wrapper(instance.gradients_X, False, True)
|
instance.gradients_X = instance._slice_wrapper(instance.gradients_X, False, True)
|
||||||
instance.gradients_X_diag = instance._slice_wrapper(instance.gradients_X_diag, True, True)
|
instance.gradients_X_diag = instance._slice_wrapper(instance.gradients_X_diag, True, True)
|
||||||
|
instance.psi0 = instance._slice_wrapper(instance.psi0, False, False)
|
||||||
|
instance.psi1 = instance._slice_wrapper(instance.psi1, False, False)
|
||||||
|
instance.psi2 = instance._slice_wrapper(instance.psi2, False, False)
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
class Kern(Parameterized):
|
class Kern(Parameterized):
|
||||||
|
|
@ -38,7 +42,7 @@ class Kern(Parameterized):
|
||||||
self._sliced_X = False
|
self._sliced_X = False
|
||||||
self._sliced_X2 = False
|
self._sliced_X2 = False
|
||||||
|
|
||||||
@Cache_this(limit=10, ignore_args = (0,))
|
@Cache_this(limit=10)#, ignore_args = (0,))
|
||||||
def _slice_X(self, X):
|
def _slice_X(self, X):
|
||||||
return X[:, self.active_dims]
|
return X[:, self.active_dims]
|
||||||
|
|
||||||
|
|
@ -56,7 +60,8 @@ class Kern(Parameterized):
|
||||||
self._sliced_X = True
|
self._sliced_X = True
|
||||||
try:
|
try:
|
||||||
ret = operation(dL_dK, X, *args, **kw)
|
ret = operation(dL_dK, X, *args, **kw)
|
||||||
except: raise
|
except:
|
||||||
|
raise
|
||||||
finally:
|
finally:
|
||||||
self._sliced_X = False
|
self._sliced_X = False
|
||||||
return ret
|
return ret
|
||||||
|
|
@ -67,7 +72,8 @@ class Kern(Parameterized):
|
||||||
self._sliced_X2 = True
|
self._sliced_X2 = True
|
||||||
try:
|
try:
|
||||||
ret = operation(dL_dK, X, X2, *args, **kw)
|
ret = operation(dL_dK, X, X2, *args, **kw)
|
||||||
except: raise
|
except:
|
||||||
|
raise
|
||||||
finally:
|
finally:
|
||||||
self._sliced_X = False
|
self._sliced_X = False
|
||||||
self._sliced_X2 = False
|
self._sliced_X2 = False
|
||||||
|
|
@ -79,7 +85,8 @@ class Kern(Parameterized):
|
||||||
self._sliced_X = True
|
self._sliced_X = True
|
||||||
try:
|
try:
|
||||||
ret = operation(X, *args, **kw)
|
ret = operation(X, *args, **kw)
|
||||||
except: raise
|
except:
|
||||||
|
raise
|
||||||
finally:
|
finally:
|
||||||
self._sliced_X = False
|
self._sliced_X = False
|
||||||
return ret
|
return ret
|
||||||
|
|
@ -100,10 +107,18 @@ class Kern(Parameterized):
|
||||||
+(","+str(bool(diag)) if diag else'')
|
+(","+str(bool(diag)) if diag else'')
|
||||||
+(','+str(bool(derivative)) if derivative else '')
|
+(','+str(bool(derivative)) if derivative else '')
|
||||||
+')')
|
+')')
|
||||||
x_slice_wrapper.__doc__ = "**sliced**\n\n" + (operation.__doc__ or "")
|
x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "")
|
||||||
return x_slice_wrapper
|
return x_slice_wrapper
|
||||||
|
|
||||||
def K(self, X, X2):
|
def K(self, X, X2):
|
||||||
|
"""
|
||||||
|
Compute the kernel function.
|
||||||
|
|
||||||
|
:param X: the first set of inputs to the kernel
|
||||||
|
:param X2: (optional) the second set of arguments to the kernel. If X2
|
||||||
|
is None, this is passed throgh to the 'part' object, which
|
||||||
|
handLes this as X2 == X.
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def Kdiag(self, X):
|
def Kdiag(self, X):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
@ -179,17 +194,10 @@ class Kern(Parameterized):
|
||||||
""" Overloading of the '+' operator. for more control, see self.add """
|
""" Overloading of the '+' operator. for more control, see self.add """
|
||||||
return self.add(other)
|
return self.add(other)
|
||||||
|
|
||||||
def add(self, other, tensor=False):
|
def add(self, other, name='add'):
|
||||||
"""
|
"""
|
||||||
Add another kernel to this one.
|
Add another kernel to this one.
|
||||||
|
|
||||||
If Tensor is False, both kernels are defined on the same _space_. then
|
|
||||||
the created kernel will have the same number of inputs as self and
|
|
||||||
other (which must be the same).
|
|
||||||
|
|
||||||
If Tensor is True, then the dimensions are stacked 'horizontally', so
|
|
||||||
that the resulting kernel has self.input_dim + other.input_dim
|
|
||||||
|
|
||||||
:param other: the other kernel to be added
|
:param other: the other kernel to be added
|
||||||
:type other: GPy.kern
|
:type other: GPy.kern
|
||||||
|
|
||||||
|
|
@ -197,23 +205,23 @@ class Kern(Parameterized):
|
||||||
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||||
from add import Add
|
from add import Add
|
||||||
kernels = []
|
kernels = []
|
||||||
if not tensor and isinstance(self, Add): kernels.extend(self._parameters_)
|
if isinstance(self, Add): kernels.extend(self._parameters_)
|
||||||
else: kernels.append(self)
|
else: kernels.append(self)
|
||||||
if not tensor and isinstance(other, Add): kernels.extend(other._parameters_)
|
if isinstance(other, Add): kernels.extend(other._parameters_)
|
||||||
else: kernels.append(other)
|
else: kernels.append(other)
|
||||||
return Add(kernels, tensor)
|
return Add(kernels, name=name)
|
||||||
|
|
||||||
def __mul__(self, other):
|
def __mul__(self, other):
|
||||||
""" Here we overload the '*' operator. See self.prod for more information"""
|
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||||
return self.prod(other)
|
return self.prod(other)
|
||||||
|
|
||||||
def __pow__(self, other):
|
#def __pow__(self, other):
|
||||||
"""
|
# """
|
||||||
Shortcut for tensor `prod`.
|
# Shortcut for tensor `prod`.
|
||||||
"""
|
# """
|
||||||
return self.prod(other, tensor=True)
|
# return self.prod(other, tensor=True)
|
||||||
|
|
||||||
def prod(self, other, tensor=False, name=None):
|
def prod(self, other, name=None):
|
||||||
"""
|
"""
|
||||||
Multiply two kernels (either on the same space, or on the tensor
|
Multiply two kernels (either on the same space, or on the tensor
|
||||||
product of the input space).
|
product of the input space).
|
||||||
|
|
@ -226,4 +234,27 @@ class Kern(Parameterized):
|
||||||
"""
|
"""
|
||||||
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||||
from prod import Prod
|
from prod import Prod
|
||||||
return Prod(self, other, tensor, name)
|
kernels = []
|
||||||
|
if isinstance(self, Prod): kernels.extend(self._parameters_)
|
||||||
|
else: kernels.append(self)
|
||||||
|
if isinstance(other, Prod): kernels.extend(other._parameters_)
|
||||||
|
else: kernels.append(other)
|
||||||
|
return Prod(self, other, name)
|
||||||
|
|
||||||
|
|
||||||
|
class CombinationKernel(Kern):
|
||||||
|
def __init__(self, kernels, name):
|
||||||
|
assert all([isinstance(k, Kern) for k in kernels])
|
||||||
|
input_dim = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels))
|
||||||
|
super(CombinationKernel, self).__init__(input_dim, name)
|
||||||
|
self.add_parameters(*kernels)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parts(self):
|
||||||
|
return self._parameters_
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dK, X):
|
||||||
|
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,12 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from kern import Kern
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from kern import CombinationKernel
|
||||||
|
from ...util.caching import Cache_this
|
||||||
|
import itertools
|
||||||
|
|
||||||
class Prod(Kern):
|
class Prod(CombinationKernel):
|
||||||
"""
|
"""
|
||||||
Computes the product of 2 kernels
|
Computes the product of 2 kernels
|
||||||
|
|
||||||
|
|
@ -15,33 +17,30 @@ class Prod(Kern):
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, k1, k2, tensor=False,name=None):
|
def __init__(self, kernels, name='prod'):
|
||||||
if tensor:
|
super(Prod, self).__init__(kernels, name)
|
||||||
name = k1.name + '_xx_' + k2.name if name is None else name
|
|
||||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, name)
|
|
||||||
self.slice1 = slice(0,k1.input_dim)
|
|
||||||
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
|
|
||||||
else:
|
|
||||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
|
|
||||||
name = k1.name + '_x_' + k2.name if name is None else name
|
|
||||||
super(Prod, self).__init__(k1.input_dim, name)
|
|
||||||
self.slice1 = slice(0, self.input_dim)
|
|
||||||
self.slice2 = slice(0, self.input_dim)
|
|
||||||
self.k1 = k1
|
|
||||||
self.k2 = k2
|
|
||||||
self.add_parameters(self.k1, self.k2)
|
|
||||||
|
|
||||||
def K(self, X, X2=None):
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
if X2 is None:
|
def K(self, X, X2=None, which_parts=None):
|
||||||
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None)
|
assert X.shape[1] == self.input_dim
|
||||||
else:
|
if which_parts is None:
|
||||||
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2])
|
which_parts = self.parts
|
||||||
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
# if only one part is given
|
||||||
|
which_parts = [which_parts]
|
||||||
|
return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
|
||||||
|
|
||||||
def Kdiag(self, X):
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
|
def Kdiag(self, X, which_parts=None):
|
||||||
|
assert X.shape[1] == self.input_dim
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
def update_gradients_full(self, dL_dK, X):
|
||||||
self.k1.update_gradients_full(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1])
|
for k1,k2 in itertools.combinations(self.parts, 2):
|
||||||
|
k1._sliced_X = k1._sliced_X2 = k2._sliced_X = k2._sliced_X2 = True
|
||||||
|
k1.update_gradients_full(dL_dK*k2.K(X, X)
|
||||||
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2])
|
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2])
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue