unified framework for addition and product of kernels, with a tensor flag (boolean) instead of and

This commit is contained in:
Nicolas 2013-05-10 17:48:11 +01:00
parent 50b7958051
commit 652b3ce2c6
8 changed files with 132 additions and 168 deletions

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:

View file

@ -20,7 +20,6 @@ from periodic_exponential import periodic_exponential as periodic_exponentialpar
from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
from prod import prod as prodpart
from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart
@ -255,7 +254,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part])
def prod(k1,k2):
def prod(k1,k2,tensor=False):
"""
Construct a product kernel over D from two kernels over D
@ -263,19 +262,8 @@ def prod(k1,k2):
:type k1, k2: kernpart
:rtype: kernel object
"""
part = prodpart(k1,k2)
return kern(k1.D, [part])
def prod_orthogonal(k1,k2):
"""
Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:rtype: kernel object
"""
part = prod_orthogonalpart(k1,k2)
return kern(k1.D+k2.D, [part])
part = prodpart(k1,k2,tensor)
return kern(part.D, [part])
def symmetric(k):
"""

View file

@ -7,7 +7,6 @@ import pylab as pb
from ..core.parameterised import parameterised
from kernpart import kernpart
import itertools
from prod_orthogonal import prod_orthogonal
from prod import prod
from ..util.linalg import symmetrify
@ -84,96 +83,72 @@ class kern(parameterised):
count += p.Nparam
def __add__(self, other):
assert self.D == other.D
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
"""
Shortcut for `add`.
"""
return self.add(other)
def add(self, other):
def add(self, other,tensor=False):
"""
Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added
:type other: GPy.kern
"""
return self + other
if tensor:
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
def add_orthogonal(self, other):
"""
Add another kernel to this one. Both kernels are defined on separate spaces
:param other: the other kernel to be added
:type other: GPy.kern
"""
# deal with input slices
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.constraints = self.constraints + other.constraints
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.constraints = self.constraints + other.constraints
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
else:
assert self.D == other.D
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def __mul__(self, other):
"""
Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces.
Shortcut for `prod`.
"""
return self.prod(other)
def prod(self, other):
def prod(self, other,tensor=False):
"""
multiply two kernels defined on the same spaces.
multiply two kernels (either on the same space, or on the tensor product of the input space)
:param other: the other kernel to be added
:type other: GPy.kern
"""
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
s1, s2 = [False]*K1.D, [False]*K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
slices += [s1+s2]
newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices)
else:
newkern = kern(K1.D, newkernparts, slices)
newkern = kern(K1.D, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern
def prod_orthogonal(self, other):
"""
multiply two kernels. Both kernels are defined on separate spaces.
:param other: the other kernel to be added
:type other: GPy.kern
"""
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkern = kern(K1.D + K2.D, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern
def _follow_constrains(self, K1, K2):
@ -469,9 +444,9 @@ class kern(parameterised):
return target_mu, target_S
def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs):
if which_functions == 'all':
which_functions = [True] * self.Nparts
def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_parts == 'all':
which_parts = [True] * self.Nparts
if self.D == 1:
if x is None:
x = np.zeros((1, 1))
@ -488,7 +463,7 @@ class kern(parameterised):
raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
Kx = self.K(Xnew, x, slices2=which_functions)
Kx = self.K(Xnew, x, which_parts)
pb.plot(Xnew, Kx, *args, **kwargs)
pb.xlim(xmin, xmax)
pb.xlabel("x")
@ -514,7 +489,7 @@ class kern(parameterised):
xg = np.linspace(xmin[0], xmax[0], resolution)
yg = np.linspace(xmin[1], xmax[1], resolution)
Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = self.K(Xnew, x, slices2=which_functions)
Kx = self.K(Xnew, x, which_parts)
Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
pb.xlim(xmin[0], xmax[0])

View file

@ -4,108 +4,108 @@
from kernpart import kernpart
import numpy as np
import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class prod(kernpart):
"""
Computes the product of 2 kernels that are defined on the same space
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self,k1,k2):
assert k1.D == k2.D, "Error: The input spaces of the kernels to multiply must have the same dimension"
self.D = k1.D
def __init__(self,k1,k2,tensor=False):
self.Nparam = k1.Nparam + k2.Nparam
self.name = k1.name + '<times>' + k2.name
self.k1 = k1
self.k2 = k2
if tensor:
self.D = k1.D + k2.D
self.slice1 = slice(0,self.k1.D)
self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D)
else:
assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.D = k1.D
self.slice1 = slice(0,self.D)
self.slice2 = slice(0,self.D)
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
def _get_params(self):
"""return the value of the parameters."""
return self.params
return np.hstack((self.k1._get_params(), self.k2._get_params()))
def _set_params(self,x):
"""set the value of the parameters."""
self.k1._set_params(x[:self.k1.Nparam])
self.k2._set_params(x[self.k1.Nparam:])
self.params = x
def _get_param_names(self):
"""return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None:
target1 = np.zeros((X.shape[0],X2.shape[0]))
target2 = np.zeros((X.shape[0],X2.shape[0]))
else:
target1 = np.zeros((X.shape[0],X.shape[0]))
target2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X,X2,target1)
self.k2.K(X,X2,target2)
target += target1 * target2
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
target += target1 * target2
self._K_computations(X,X2)
target += self._K1 * self._K2
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2)
self._K_computations(X,X2)
if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:])
else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:])
k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam)
self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target)
self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros(X.shape[0])
target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:])
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2)
self._K_computations(X,X2)
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)
self.k1.dK_dX(dL_dK*K2, X, X2, target)
self.k2.dK_dX(dL_dK*K1, X, X2, target)
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
def dKdiag_dX(self,dL_dKdiag,X,target):
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)
self.k1.dKdiag_dX(dL_dKdiag*target2, X, target)
self.k2.dKdiag_dX(dL_dKdiag*target1, X, target)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam)
self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target)
self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target)
target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()
if X2 is None:
self._X2 = None
self._K1 = np.zeros((X.shape[0],X.shape[0]))
self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
else:
self._X2 = X2.copy()
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)