unified framework for addition and product of kernels, with a tensor flag (boolean) instead of and

2026-05-18 13:55:14 +02:00 · 2013-05-10 17:48:11 +01:00 · 2013-05-10 17:48:11 +01:00 · 652b3ce2c6
commit 652b3ce2c6
parent 50b7958051
8 changed files with 132 additions and 168 deletions
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)


-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
 try:
    from constructors import rbf_sympy, sympykern # these depend on sympy
 except:
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@ -20,7 +20,6 @@ from periodic_exponential import periodic_exponential as periodic_exponentialpar
 from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
 from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
 from prod import prod as prodpart
-from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
 from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
 from rational_quadratic import rational_quadratic as rational_quadraticpart
@ -255,7 +254,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
    part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
    return kern(D, [part])

-def prod(k1,k2):
+def prod(k1,k2,tensor=False):
    """
     Construct a product kernel over D from two kernels over D

@ -263,19 +262,8 @@ def prod(k1,k2):
    :type k1, k2: kernpart
    :rtype: kernel object
    """
-    part = prodpart(k1,k2)
-    return kern(k1.D, [part])
-
-def prod_orthogonal(k1,k2):
-    """
-     Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
-
-    :param k1, k2: the kernels to multiply
-    :type k1, k2: kernpart
-    :rtype: kernel object
-    """
-    part = prod_orthogonalpart(k1,k2)
-    return kern(k1.D+k2.D, [part])
+    part = prodpart(k1,k2,tensor)
+    return kern(part.D, [part])

 def symmetric(k):
    """
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -7,7 +7,6 @@ import pylab as pb
 from ..core.parameterised import parameterised
 from kernpart import kernpart
 import itertools
-from prod_orthogonal import prod_orthogonal
 from prod import prod
 from ..util.linalg import symmetrify

@ -84,96 +83,72 @@ class kern(parameterised):
            count += p.Nparam

    def __add__(self, other):
-        assert self.D == other.D
-        newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
-        # transfer constraints:
-        newkern.constrained_indices = self.constrained_indices + [i+self.Nparam  for i in other.constrained_indices]
-        newkern.constraints = self.constraints + other.constraints
-        newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
-        newkern.fixed_values = self.fixed_values + other.fixed_values
-        newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
-        return newkern
+        """
+        Shortcut for `add`.
+        """
+        return self.add(other)

-    def add(self, other):
+    def add(self, other,tensor=False):
        """
        Add another kernel to this one. Both kernels are defined on the same _space_
        :param other: the other kernel to be added
        :type other: GPy.kern
        """
-        return self + other
+        if tensor:
+            D = self.D + other.D
+            self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
+            other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
+            other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]

-    def add_orthogonal(self, other):
-        """
-        Add another kernel to this one. Both kernels are defined on separate spaces
-        :param other: the other kernel to be added
-        :type other: GPy.kern
-        """
-        # deal with input slices
-        D = self.D + other.D
-        self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
-        other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
-        other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
+            newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)

-        newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
-
-        # transfer constraints:
-        newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
-        newkern.constraints = self.constraints + other.constraints
-        newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
-        newkern.fixed_values = self.fixed_values + other.fixed_values
-        newkern.constraints = self.constraints + other.constraints
-        newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
-        newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
+            # transfer constraints:
+            newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
+            newkern.constraints = self.constraints + other.constraints
+            newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
+            newkern.fixed_values = self.fixed_values + other.fixed_values
+            newkern.constraints = self.constraints + other.constraints
+            newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
+        else:
+            assert self.D == other.D
+            newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
+            # transfer constraints:
+            newkern.constrained_indices = self.constrained_indices + [i+self.Nparam  for i in other.constrained_indices]
+            newkern.constraints = self.constraints + other.constraints
+            newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
+            newkern.fixed_values = self.fixed_values + other.fixed_values
+            newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
        return newkern

    def __mul__(self, other):
        """
-        Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces.
+        Shortcut for `prod`.
        """
        return self.prod(other)

-    def prod(self, other):
+    def prod(self, other,tensor=False):
        """
-        multiply two kernels defined on the same spaces.
+        multiply two kernels (either on the same space, or on the tensor product of the input space)
        :param other: the other kernel to be added
        :type other: GPy.kern
        """
        K1 = self.copy()
        K2 = other.copy()

-        newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
-
        slices = []
-        for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
-            s1, s2 = [False] * K1.D, [False] * K2.D
+        for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
+            s1, s2 = [False]*K1.D, [False]*K2.D
            s1[sl1], s2[sl2] = [True], [True]
-            slices += [s1 + s2]
+            slices += [s1+s2]
+
+        newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
+
+        if tensor:
+            newkern = kern(K1.D + K2.D, newkernparts, slices)
+        else:
+            newkern = kern(K1.D, newkernparts, slices)

-        newkern = kern(K1.D, newkernparts, slices)
        newkern._follow_constrains(K1, K2)
-
-        return newkern
-
-    def prod_orthogonal(self, other):
-        """
-        multiply two kernels. Both kernels are defined on separate spaces.
-        :param other: the other kernel to be added
-        :type other: GPy.kern
-        """
-        K1 = self.copy()
-        K2 = other.copy()
-
-        newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
-
-        slices = []
-        for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
-            s1, s2 = [False] * K1.D, [False] * K2.D
-            s1[sl1], s2[sl2] = [True], [True]
-            slices += [s1 + s2]
-
-        newkern = kern(K1.D + K2.D, newkernparts, slices)
-        newkern._follow_constrains(K1, K2)
-
        return newkern

    def _follow_constrains(self, K1, K2):
@ -469,9 +444,9 @@ class kern(parameterised):

        return target_mu, target_S

-    def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs):
-        if which_functions == 'all':
-            which_functions = [True] * self.Nparts
+    def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
+        if which_parts == 'all':
+            which_parts = [True] * self.Nparts
        if self.D == 1:
            if x is None:
                x = np.zeros((1, 1))
@ -488,7 +463,7 @@ class kern(parameterised):
                raise ValueError, "Bad limits for plotting"

            Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
-            Kx = self.K(Xnew, x, slices2=which_functions)
+            Kx = self.K(Xnew, x, which_parts)
            pb.plot(Xnew, Kx, *args, **kwargs)
            pb.xlim(xmin, xmax)
            pb.xlabel("x")
@ -514,7 +489,7 @@ class kern(parameterised):
            xg = np.linspace(xmin[0], xmax[0], resolution)
            yg = np.linspace(xmin[1], xmax[1], resolution)
            Xnew = np.vstack((xx.flatten(), yy.flatten())).T
-            Kx = self.K(Xnew, x, slices2=which_functions)
+            Kx = self.K(Xnew, x, which_parts)
            Kx = Kx.reshape(resolution, resolution).T
            pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
            pb.xlim(xmin[0], xmax[0])
--- a/GPy/kern/prod.py
+++ b/GPy/kern/prod.py
@ -4,108 +4,108 @@
 from kernpart import kernpart
 import numpy as np
 import hashlib
-#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)

 class prod(kernpart):
    """
-    Computes the product of 2 kernels that are defined on the same space
+    Computes the product of 2 kernels

    :param k1, k2: the kernels to multiply
    :type k1, k2: kernpart
+    :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
+    :type tensor: Boolean
    :rtype: kernel object

    """
-    def __init__(self,k1,k2):
-        assert k1.D == k2.D, "Error: The input spaces of the kernels to multiply must have the same dimension"
-        self.D = k1.D
+    def __init__(self,k1,k2,tensor=False):
        self.Nparam = k1.Nparam + k2.Nparam
        self.name = k1.name + '<times>' + k2.name
        self.k1 = k1
        self.k2 = k2
+        if tensor:
+            self.D = k1.D + k2.D
+            self.slice1 = slice(0,self.k1.D)
+            self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D)
+        else:
+            assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension."
+            self.D = k1.D
+            self.slice1 = slice(0,self.D)
+            self.slice2 = slice(0,self.D)
+
+        self._X, self._X2, self._params = np.empty(shape=(3,1))
        self._set_params(np.hstack((k1._get_params(),k2._get_params())))

    def _get_params(self):
        """return the value of the parameters."""
-        return self.params
+        return np.hstack((self.k1._get_params(), self.k2._get_params()))

    def _set_params(self,x):
        """set the value of the parameters."""
        self.k1._set_params(x[:self.k1.Nparam])
        self.k2._set_params(x[self.k1.Nparam:])
-        self.params = x

    def _get_param_names(self):
        """return parameter names."""
        return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]

    def K(self,X,X2,target):
-        """Compute the covariance matrix between X and X2."""
-        if X2 is None:
-            target1 = np.zeros((X.shape[0],X2.shape[0]))
-            target2 = np.zeros((X.shape[0],X2.shape[0]))
-        else:
-            target1 = np.zeros((X.shape[0],X.shape[0]))
-            target2 = np.zeros((X.shape[0],X.shape[0]))
-        self.k1.K(X,X2,target1)
-        self.k2.K(X,X2,target2)
-        target += target1 * target2
-
-    def Kdiag(self,X,target):
-        """Compute the diagonal of the covariance matrix associated to X."""
-        target1 = np.zeros((X.shape[0],))
-        target2 = np.zeros((X.shape[0],))
-        self.k1.Kdiag(X,target1)
-        self.k2.Kdiag(X,target2)
-        target += target1 * target2
+        self._K_computations(X,X2)
+        target += self._K1 * self._K2

    def dK_dtheta(self,dL_dK,X,X2,target):
        """derivative of the covariance matrix with respect to the parameters."""
-        if X2 is None: X2 = X
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X,X2,K1)
-        self.k2.K(X,X2,K2)
+        self._K_computations(X,X2)
+        if X2 is None:
+            self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam])
+            self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:])
+        else:
+            self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam])
+            self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:])

-        k1_target = np.zeros(self.k1.Nparam)
-        k2_target = np.zeros(self.k2.Nparam)
-        self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target)
-        self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target)
+    def Kdiag(self,X,target):
+        """Compute the diagonal of the covariance matrix associated to X."""
+        target1 = np.zeros(X.shape[0])
+        target2 = np.zeros(X.shape[0])
+        self.k1.Kdiag(X[:,self.slice1],target1)
+        self.k2.Kdiag(X[:,self.slice2],target2)
+        target += target1 * target2

-        target[:self.k1.Nparam] += k1_target
-        target[self.k1.Nparam:] += k2_target
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        K1 = np.zeros(X.shape[0])
+        K2 = np.zeros(X.shape[0])
+        self.k1.Kdiag(X[:,self.slice1],K1)
+        self.k2.Kdiag(X[:,self.slice2],K2)
+        self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam])
+        self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:])

    def dK_dX(self,dL_dK,X,X2,target):
        """derivative of the covariance matrix with respect to X."""
-        if X2 is None: X2 = X
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X,X2,K1)
-        self.k2.K(X,X2,K2)
+        self._K_computations(X,X2)
+        self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
+        self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)

-        self.k1.dK_dX(dL_dK*K2, X, X2, target)
-        self.k2.dK_dX(dL_dK*K1, X, X2, target)
+    def dKdiag_dX(self, dL_dKdiag, X, target):
+        K1 = np.zeros(X.shape[0])
+        K2 = np.zeros(X.shape[0])
+        self.k1.Kdiag(X[:,self.slice1],K1)
+        self.k2.Kdiag(X[:,self.slice2],K2)

-    def dKdiag_dX(self,dL_dKdiag,X,target):
-        target1 = np.zeros((X.shape[0],))
-        target2 = np.zeros((X.shape[0],))
-        self.k1.Kdiag(X,target1)
-        self.k2.Kdiag(X,target2)
+        self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
+        self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)

-        self.k1.dKdiag_dX(dL_dKdiag*target2, X, target)
-        self.k2.dKdiag_dX(dL_dKdiag*target1, X, target)
-
-    def dKdiag_dtheta(self,dL_dKdiag,X,target):
-        """Compute the diagonal of the covariance matrix associated to X."""
-        target1 = np.zeros((X.shape[0],))
-        target2 = np.zeros((X.shape[0],))
-        self.k1.Kdiag(X,target1)
-        self.k2.Kdiag(X,target2)
-
-        k1_target = np.zeros(self.k1.Nparam)
-        k2_target = np.zeros(self.k2.Nparam)
-        self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target)
-        self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target)
-
-        target[:self.k1.Nparam] += k1_target
-        target[self.k1.Nparam:] += k2_target
+    def _K_computations(self,X,X2):
+        if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
+            self._X = X.copy()
+            self._params == self._get_params().copy()
+            if X2 is None:
+                self._X2 = None
+                self._K1 = np.zeros((X.shape[0],X.shape[0]))
+                self._K2 = np.zeros((X.shape[0],X.shape[0]))
+                self.k1.K(X[:,self.slice1],None,self._K1)
+                self.k2.K(X[:,self.slice2],None,self._K2)
+            else:
+                self._X2 = X2.copy()
+                self._K1 = np.zeros((X.shape[0],X2.shape[0]))
+                self._K2 = np.zeros((X.shape[0],X2.shape[0]))
+                self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
+                self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)