From 2d8246d33f779823ba4b5bf8060c855c888f5147 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 11 Mar 2014 10:24:15 +0000 Subject: [PATCH] Combination Kernel for add and prod --- GPy/kern/_src/add.py | 51 +++++++------------------- GPy/kern/_src/kern.py | 83 +++++++++++++++++++++++++++++-------------- GPy/kern/_src/prod.py | 51 +++++++++++++------------- 3 files changed, 94 insertions(+), 91 deletions(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 604ed103..433a8921 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -5,40 +5,19 @@ import numpy as np import itertools from ...core.parameterization import Parameterized from ...util.caching import Cache_this -from kern import Kern +from kern import CombinationKernel -class Add(Kern): - def __init__(self, subkerns, tensor): - assert all([isinstance(k, Kern) for k in subkerns]) - if tensor: - input_dim = sum([k.input_dim for k in subkerns]) - self.input_slices = [] - n = 0 - for k in subkerns: - self.input_slices.append(slice(n, n+k.input_dim)) - n += k.input_dim - else: - assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) - input_dim = subkerns[0].input_dim - self.input_slices = [slice(None) for k in subkerns] - super(Add, self).__init__(input_dim, 'add') - self.add_parameters(*subkerns) +class Add(CombinationKernel): + """ + Add given list of kernels together. + propagates gradients thorugh. + """ + def __init__(self, subkerns, name='add'): + super(Add, self).__init__(subkerns, name) - @property - def parts(self): - return self._parameters_ - - def K(self, X, X2=None): - """ - Compute the kernel function. - - :param X: the first set of inputs to the kernel - :param X2: (optional) the second set of arguments to the kernel. If X2 - is None, this is passed throgh to the 'part' object, which - handLes this as X2 == X. - """ + @Cache_this(limit=2, force_kwargs=['which_parts']) + def K(self, X, X2=None, which_parts=None): assert X.shape[1] == self.input_dim - which_parts=None if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -46,12 +25,6 @@ class Add(Kern): which_parts = [which_parts] return sum([p.K(X, X2) for p in which_parts]) - def update_gradients_full(self, dL_dK, X, X2=None): - [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] - - def update_gradients_diag(self, dL_dK, X): - [p.update_gradients_diag(dL_dK, X) for p in self.parts] - def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -67,8 +40,8 @@ class Add(Kern): target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2) return target - def Kdiag(self, X): - which_parts=None + @Cache_this(limit=2, force_kwargs=['which_parts']) + def Kdiag(self, X, which_parts=None): assert X.shape[1] == self.input_dim if which_parts is None: which_parts = self.parts diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 96bab646..a1106241 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -2,6 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import sys +import numpy as np from ...core.parameterization.parameterized import ParametersChangedMeta, Parameterized from ...util.caching import Cache_this @@ -14,8 +15,11 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): instance.update_gradients_diag = instance._slice_wrapper(instance.update_gradients_diag, True, True) instance.gradients_X = instance._slice_wrapper(instance.gradients_X, False, True) instance.gradients_X_diag = instance._slice_wrapper(instance.gradients_X_diag, True, True) + instance.psi0 = instance._slice_wrapper(instance.psi0, False, False) + instance.psi1 = instance._slice_wrapper(instance.psi1, False, False) + instance.psi2 = instance._slice_wrapper(instance.psi2, False, False) return instance - + class Kern(Parameterized): __metaclass__ = KernCallsViaSlicerMeta def __init__(self, input_dim, name, *a, **kw): @@ -37,11 +41,11 @@ class Kern(Parameterized): self.input_dim = len(self.active_dims) self._sliced_X = False self._sliced_X2 = False - - @Cache_this(limit=10, ignore_args = (0,)) + + @Cache_this(limit=10)#, ignore_args = (0,)) def _slice_X(self, X): return X[:, self.active_dims] - + def _slice_wrapper(self, operation, diag=False, derivative=False): """ This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. @@ -56,7 +60,8 @@ class Kern(Parameterized): self._sliced_X = True try: ret = operation(dL_dK, X, *args, **kw) - except: raise + except: + raise finally: self._sliced_X = False return ret @@ -67,7 +72,8 @@ class Kern(Parameterized): self._sliced_X2 = True try: ret = operation(dL_dK, X, X2, *args, **kw) - except: raise + except: + raise finally: self._sliced_X = False self._sliced_X2 = False @@ -79,7 +85,8 @@ class Kern(Parameterized): self._sliced_X = True try: ret = operation(X, *args, **kw) - except: raise + except: + raise finally: self._sliced_X = False return ret @@ -100,10 +107,18 @@ class Kern(Parameterized): +(","+str(bool(diag)) if diag else'') +(','+str(bool(derivative)) if derivative else '') +')') - x_slice_wrapper.__doc__ = "**sliced**\n\n" + (operation.__doc__ or "") + x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "") return x_slice_wrapper def K(self, X, X2): + """ + Compute the kernel function. + + :param X: the first set of inputs to the kernel + :param X2: (optional) the second set of arguments to the kernel. If X2 + is None, this is passed throgh to the 'part' object, which + handLes this as X2 == X. + """ raise NotImplementedError def Kdiag(self, X): raise NotImplementedError @@ -179,17 +194,10 @@ class Kern(Parameterized): """ Overloading of the '+' operator. for more control, see self.add """ return self.add(other) - def add(self, other, tensor=False): + def add(self, other, name='add'): """ Add another kernel to this one. - If Tensor is False, both kernels are defined on the same _space_. then - the created kernel will have the same number of inputs as self and - other (which must be the same). - - If Tensor is True, then the dimensions are stacked 'horizontally', so - that the resulting kernel has self.input_dim + other.input_dim - :param other: the other kernel to be added :type other: GPy.kern @@ -197,23 +205,23 @@ class Kern(Parameterized): assert isinstance(other, Kern), "only kernels can be added to kernels..." from add import Add kernels = [] - if not tensor and isinstance(self, Add): kernels.extend(self._parameters_) + if isinstance(self, Add): kernels.extend(self._parameters_) else: kernels.append(self) - if not tensor and isinstance(other, Add): kernels.extend(other._parameters_) + if isinstance(other, Add): kernels.extend(other._parameters_) else: kernels.append(other) - return Add(kernels, tensor) + return Add(kernels, name=name) def __mul__(self, other): """ Here we overload the '*' operator. See self.prod for more information""" return self.prod(other) - def __pow__(self, other): - """ - Shortcut for tensor `prod`. - """ - return self.prod(other, tensor=True) + #def __pow__(self, other): + # """ + # Shortcut for tensor `prod`. + # """ + # return self.prod(other, tensor=True) - def prod(self, other, tensor=False, name=None): + def prod(self, other, name=None): """ Multiply two kernels (either on the same space, or on the tensor product of the input space). @@ -226,4 +234,27 @@ class Kern(Parameterized): """ assert isinstance(other, Kern), "only kernels can be added to kernels..." from prod import Prod - return Prod(self, other, tensor, name) + kernels = [] + if isinstance(self, Prod): kernels.extend(self._parameters_) + else: kernels.append(self) + if isinstance(other, Prod): kernels.extend(other._parameters_) + else: kernels.append(other) + return Prod(self, other, name) + + +class CombinationKernel(Kern): + def __init__(self, kernels, name): + assert all([isinstance(k, Kern) for k in kernels]) + input_dim = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels)) + super(CombinationKernel, self).__init__(input_dim, name) + self.add_parameters(*kernels) + + @property + def parts(self): + return self._parameters_ + + def update_gradients_full(self, dL_dK, X, X2=None): + [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] + + def update_gradients_diag(self, dL_dK, X): + [p.update_gradients_diag(dL_dK, X) for p in self.parts] diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index 51490687..77b2ea51 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -1,10 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kern import Kern import numpy as np +from kern import CombinationKernel +from ...util.caching import Cache_this +import itertools -class Prod(Kern): +class Prod(CombinationKernel): """ Computes the product of 2 kernels @@ -15,34 +17,31 @@ class Prod(Kern): :rtype: kernel object """ - def __init__(self, k1, k2, tensor=False,name=None): - if tensor: - name = k1.name + '_xx_' + k2.name if name is None else name - super(Prod, self).__init__(k1.input_dim + k2.input_dim, name) - self.slice1 = slice(0,k1.input_dim) - self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim) - else: - assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension." - name = k1.name + '_x_' + k2.name if name is None else name - super(Prod, self).__init__(k1.input_dim, name) - self.slice1 = slice(0, self.input_dim) - self.slice2 = slice(0, self.input_dim) - self.k1 = k1 - self.k2 = k2 - self.add_parameters(self.k1, self.k2) + def __init__(self, kernels, name='prod'): + super(Prod, self).__init__(kernels, name) - def K(self, X, X2=None): - if X2 is None: - return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None) - else: - return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2]) + @Cache_this(limit=2, force_kwargs=['which_parts']) + def K(self, X, X2=None, which_parts=None): + assert X.shape[1] == self.input_dim + if which_parts is None: + which_parts = self.parts + elif not isinstance(which_parts, (list, tuple)): + # if only one part is given + which_parts = [which_parts] + return reduce(np.multiply, (p.K(X, X2) for p in which_parts)) - def Kdiag(self, X): - return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + @Cache_this(limit=2, force_kwargs=['which_parts']) + def Kdiag(self, X, which_parts=None): + assert X.shape[1] == self.input_dim + if which_parts is None: + which_parts = self.parts + return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) def update_gradients_full(self, dL_dK, X): - self.k1.update_gradients_full(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1]) - self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2]) + for k1,k2 in itertools.combinations(self.parts, 2): + k1._sliced_X = k1._sliced_X2 = k2._sliced_X = k2._sliced_X2 = True + k1.update_gradients_full(dL_dK*k2.K(X, X) + self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2]) def gradients_X(self, dL_dK, X, X2=None): target = np.zeros(X.shape)