Combination Kernel for add and prod

This commit is contained in:
Max Zwiessele 2014-03-11 10:24:15 +00:00
parent 7e9078b0f9
commit 2d8246d33f
3 changed files with 94 additions and 91 deletions

View file

@ -5,40 +5,19 @@ import numpy as np
import itertools import itertools
from ...core.parameterization import Parameterized from ...core.parameterization import Parameterized
from ...util.caching import Cache_this from ...util.caching import Cache_this
from kern import Kern from kern import CombinationKernel
class Add(Kern): class Add(CombinationKernel):
def __init__(self, subkerns, tensor): """
assert all([isinstance(k, Kern) for k in subkerns]) Add given list of kernels together.
if tensor: propagates gradients thorugh.
input_dim = sum([k.input_dim for k in subkerns]) """
self.input_slices = [] def __init__(self, subkerns, name='add'):
n = 0 super(Add, self).__init__(subkerns, name)
for k in subkerns:
self.input_slices.append(slice(n, n+k.input_dim))
n += k.input_dim
else:
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
input_dim = subkerns[0].input_dim
self.input_slices = [slice(None) for k in subkerns]
super(Add, self).__init__(input_dim, 'add')
self.add_parameters(*subkerns)
@property @Cache_this(limit=2, force_kwargs=['which_parts'])
def parts(self): def K(self, X, X2=None, which_parts=None):
return self._parameters_
def K(self, X, X2=None):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
which_parts=None
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)): elif not isinstance(which_parts, (list, tuple)):
@ -46,12 +25,6 @@ class Add(Kern):
which_parts = [which_parts] which_parts = [which_parts]
return sum([p.K(X, X2) for p in which_parts]) return sum([p.K(X, X2) for p in which_parts])
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X. """Compute the gradient of the objective function with respect to X.
@ -67,8 +40,8 @@ class Add(Kern):
target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2) target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2)
return target return target
def Kdiag(self, X): @Cache_this(limit=2, force_kwargs=['which_parts'])
which_parts=None def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts

View file

@ -2,6 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys import sys
import numpy as np
from ...core.parameterization.parameterized import ParametersChangedMeta, Parameterized from ...core.parameterization.parameterized import ParametersChangedMeta, Parameterized
from ...util.caching import Cache_this from ...util.caching import Cache_this
@ -14,8 +15,11 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
instance.update_gradients_diag = instance._slice_wrapper(instance.update_gradients_diag, True, True) instance.update_gradients_diag = instance._slice_wrapper(instance.update_gradients_diag, True, True)
instance.gradients_X = instance._slice_wrapper(instance.gradients_X, False, True) instance.gradients_X = instance._slice_wrapper(instance.gradients_X, False, True)
instance.gradients_X_diag = instance._slice_wrapper(instance.gradients_X_diag, True, True) instance.gradients_X_diag = instance._slice_wrapper(instance.gradients_X_diag, True, True)
instance.psi0 = instance._slice_wrapper(instance.psi0, False, False)
instance.psi1 = instance._slice_wrapper(instance.psi1, False, False)
instance.psi2 = instance._slice_wrapper(instance.psi2, False, False)
return instance return instance
class Kern(Parameterized): class Kern(Parameterized):
__metaclass__ = KernCallsViaSlicerMeta __metaclass__ = KernCallsViaSlicerMeta
def __init__(self, input_dim, name, *a, **kw): def __init__(self, input_dim, name, *a, **kw):
@ -37,11 +41,11 @@ class Kern(Parameterized):
self.input_dim = len(self.active_dims) self.input_dim = len(self.active_dims)
self._sliced_X = False self._sliced_X = False
self._sliced_X2 = False self._sliced_X2 = False
@Cache_this(limit=10, ignore_args = (0,)) @Cache_this(limit=10)#, ignore_args = (0,))
def _slice_X(self, X): def _slice_X(self, X):
return X[:, self.active_dims] return X[:, self.active_dims]
def _slice_wrapper(self, operation, diag=False, derivative=False): def _slice_wrapper(self, operation, diag=False, derivative=False):
""" """
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
@ -56,7 +60,8 @@ class Kern(Parameterized):
self._sliced_X = True self._sliced_X = True
try: try:
ret = operation(dL_dK, X, *args, **kw) ret = operation(dL_dK, X, *args, **kw)
except: raise except:
raise
finally: finally:
self._sliced_X = False self._sliced_X = False
return ret return ret
@ -67,7 +72,8 @@ class Kern(Parameterized):
self._sliced_X2 = True self._sliced_X2 = True
try: try:
ret = operation(dL_dK, X, X2, *args, **kw) ret = operation(dL_dK, X, X2, *args, **kw)
except: raise except:
raise
finally: finally:
self._sliced_X = False self._sliced_X = False
self._sliced_X2 = False self._sliced_X2 = False
@ -79,7 +85,8 @@ class Kern(Parameterized):
self._sliced_X = True self._sliced_X = True
try: try:
ret = operation(X, *args, **kw) ret = operation(X, *args, **kw)
except: raise except:
raise
finally: finally:
self._sliced_X = False self._sliced_X = False
return ret return ret
@ -100,10 +107,18 @@ class Kern(Parameterized):
+(","+str(bool(diag)) if diag else'') +(","+str(bool(diag)) if diag else'')
+(','+str(bool(derivative)) if derivative else '') +(','+str(bool(derivative)) if derivative else '')
+')') +')')
x_slice_wrapper.__doc__ = "**sliced**\n\n" + (operation.__doc__ or "") x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "")
return x_slice_wrapper return x_slice_wrapper
def K(self, X, X2): def K(self, X, X2):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
raise NotImplementedError raise NotImplementedError
def Kdiag(self, X): def Kdiag(self, X):
raise NotImplementedError raise NotImplementedError
@ -179,17 +194,10 @@ class Kern(Parameterized):
""" Overloading of the '+' operator. for more control, see self.add """ """ Overloading of the '+' operator. for more control, see self.add """
return self.add(other) return self.add(other)
def add(self, other, tensor=False): def add(self, other, name='add'):
""" """
Add another kernel to this one. Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
@ -197,23 +205,23 @@ class Kern(Parameterized):
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add from add import Add
kernels = [] kernels = []
if not tensor and isinstance(self, Add): kernels.extend(self._parameters_) if isinstance(self, Add): kernels.extend(self._parameters_)
else: kernels.append(self) else: kernels.append(self)
if not tensor and isinstance(other, Add): kernels.extend(other._parameters_) if isinstance(other, Add): kernels.extend(other._parameters_)
else: kernels.append(other) else: kernels.append(other)
return Add(kernels, tensor) return Add(kernels, name=name)
def __mul__(self, other): def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other) return self.prod(other)
def __pow__(self, other): #def __pow__(self, other):
""" # """
Shortcut for tensor `prod`. # Shortcut for tensor `prod`.
""" # """
return self.prod(other, tensor=True) # return self.prod(other, tensor=True)
def prod(self, other, tensor=False, name=None): def prod(self, other, name=None):
""" """
Multiply two kernels (either on the same space, or on the tensor Multiply two kernels (either on the same space, or on the tensor
product of the input space). product of the input space).
@ -226,4 +234,27 @@ class Kern(Parameterized):
""" """
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod from prod import Prod
return Prod(self, other, tensor, name) kernels = []
if isinstance(self, Prod): kernels.extend(self._parameters_)
else: kernels.append(self)
if isinstance(other, Prod): kernels.extend(other._parameters_)
else: kernels.append(other)
return Prod(self, other, name)
class CombinationKernel(Kern):
def __init__(self, kernels, name):
assert all([isinstance(k, Kern) for k in kernels])
input_dim = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels))
super(CombinationKernel, self).__init__(input_dim, name)
self.add_parameters(*kernels)
@property
def parts(self):
return self._parameters_
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]

View file

@ -1,10 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np import numpy as np
from kern import CombinationKernel
from ...util.caching import Cache_this
import itertools
class Prod(Kern): class Prod(CombinationKernel):
""" """
Computes the product of 2 kernels Computes the product of 2 kernels
@ -15,34 +17,31 @@ class Prod(Kern):
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self, k1, k2, tensor=False,name=None): def __init__(self, kernels, name='prod'):
if tensor: super(Prod, self).__init__(kernels, name)
name = k1.name + '_xx_' + k2.name if name is None else name
super(Prod, self).__init__(k1.input_dim + k2.input_dim, name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
name = k1.name + '_x_' + k2.name if name is None else name
super(Prod, self).__init__(k1.input_dim, name)
self.slice1 = slice(0, self.input_dim)
self.slice2 = slice(0, self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
def K(self, X, X2=None): @Cache_this(limit=2, force_kwargs=['which_parts'])
if X2 is None: def K(self, X, X2=None, which_parts=None):
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None) assert X.shape[1] == self.input_dim
else: if which_parts is None:
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2]) which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
def Kdiag(self, X): @Cache_this(limit=2, force_kwargs=['which_parts'])
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X):
self.k1.update_gradients_full(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1]) for k1,k2 in itertools.combinations(self.parts, 2):
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2]) k1._sliced_X = k1._sliced_X2 = k2._sliced_X = k2._sliced_X2 = True
k1.update_gradients_full(dL_dK*k2.K(X, X)
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2])
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)