mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-21 14:05:14 +02:00
slicing support for kernel input dimension
This commit is contained in:
parent
5f3524e7da
commit
db5fd17609
10 changed files with 178 additions and 65 deletions
|
|
@ -1,12 +1,10 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import itertools
|
||||
from linear import Linear
|
||||
from ...core.parameterization import Parameterized
|
||||
from ...core.parameterization.param import Param
|
||||
from ...util.caching import Cache_this
|
||||
from kern import Kern
|
||||
|
||||
class Add(Kern):
|
||||
|
|
@ -14,19 +12,24 @@ class Add(Kern):
|
|||
assert all([isinstance(k, Kern) for k in subkerns])
|
||||
if tensor:
|
||||
input_dim = sum([k.input_dim for k in subkerns])
|
||||
self.input_slices = []
|
||||
self.self.active_dims = []
|
||||
n = 0
|
||||
for k in subkerns:
|
||||
self.input_slices.append(slice(n, n+k.input_dim))
|
||||
self.self.active_dims.append(slice(n, n+k.input_dim))
|
||||
n += k.input_dim
|
||||
else:
|
||||
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
|
||||
input_dim = subkerns[0].input_dim
|
||||
self.input_slices = [slice(None) for k in subkerns]
|
||||
#assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
|
||||
#input_dim = subkerns[0].input_dim
|
||||
#self.input_slices = [slice(None) for k in subkerns]
|
||||
input_dim = reduce(np.union1d, map(lambda x: np.r_[x.active_dims], subkerns))
|
||||
super(Add, self).__init__(input_dim, 'add')
|
||||
self.add_parameters(*subkerns)
|
||||
|
||||
|
||||
|
||||
@property
|
||||
def parts(self):
|
||||
return self._parameters_
|
||||
|
||||
@Cache_this(limit=1, force_kwargs=('which_parts',))
|
||||
def K(self, X, X2=None):
|
||||
"""
|
||||
Compute the kernel function.
|
||||
|
|
@ -37,13 +40,19 @@ class Add(Kern):
|
|||
handLes this as X2 == X.
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
if X2 is None:
|
||||
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||
else:
|
||||
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||
which_parts=None
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
# if only one part is given
|
||||
which_parts = [which_parts]
|
||||
return sum([p.K(X, X2) for p in which_parts])
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
||||
|
||||
def update_gradients_diag(self, dL_dK, X):
|
||||
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""Compute the gradient of the objective function with respect to X.
|
||||
|
|
@ -55,16 +64,17 @@ class Add(Kern):
|
|||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros_like(X)
|
||||
if X2 is None:
|
||||
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
else:
|
||||
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
target = np.zeros(X.shape)
|
||||
for p in self.parts:
|
||||
target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2)
|
||||
return target
|
||||
|
||||
def Kdiag(self, X):
|
||||
which_parts=None
|
||||
assert X.shape[1] == self.input_dim
|
||||
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
return sum([p.Kdiag(X) for p in which_parts])
|
||||
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
|
|
|
|||
|
|
@ -2,13 +2,22 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import itertools
|
||||
from ...core.parameterization import Parameterized
|
||||
from ...core.parameterization.param import Param
|
||||
|
||||
from ...core.parameterization.parameterized import ParametersChangedMeta, Parameterized
|
||||
from ...util.caching import Cache_this
|
||||
|
||||
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||
def __call__(self, *args, **kw):
|
||||
instance = super(KernCallsViaSlicerMeta, self).__call__(*args, **kw)
|
||||
instance.K = instance._slice_wrapper(instance.K)
|
||||
instance.Kdiag = instance._slice_wrapper(instance.Kdiag, True)
|
||||
instance.update_gradients_full = instance._slice_wrapper(instance.update_gradients_full, False, True)
|
||||
instance.update_gradients_diag = instance._slice_wrapper(instance.update_gradients_diag, True, True)
|
||||
instance.gradients_X = instance._slice_wrapper(instance.gradients_X, False, True)
|
||||
instance.gradients_X_diag = instance._slice_wrapper(instance.gradients_X_diag, True, True)
|
||||
return instance
|
||||
|
||||
class Kern(Parameterized):
|
||||
__metaclass__ = KernCallsViaSlicerMeta
|
||||
def __init__(self, input_dim, name, *a, **kw):
|
||||
"""
|
||||
The base class for a kernel: a positive definite function
|
||||
|
|
@ -20,11 +29,83 @@ class Kern(Parameterized):
|
|||
Do not instantiate.
|
||||
"""
|
||||
super(Kern, self).__init__(name=name, *a, **kw)
|
||||
self.input_dim = input_dim
|
||||
|
||||
if isinstance(input_dim, int):
|
||||
self.active_dims = slice(0, input_dim)
|
||||
self.input_dim = input_dim
|
||||
else:
|
||||
self.active_dims = input_dim
|
||||
self.input_dim = len(self.active_dims)
|
||||
self._sliced_X = False
|
||||
self._sliced_X2 = False
|
||||
|
||||
@Cache_this(limit=10, ignore_args = (0,))
|
||||
def _slice_X(self, X):
|
||||
return X[:, self.active_dims]
|
||||
|
||||
def _slice_wrapper(self, operation, diag=False, derivative=False):
|
||||
"""
|
||||
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
|
||||
The different switches are:
|
||||
diag: if X2 exists
|
||||
derivative: if firest arg is dL_dK
|
||||
"""
|
||||
if derivative:
|
||||
if diag:
|
||||
def x_slice_wrapper(dL_dK, X, *args, **kw):
|
||||
X = self._slice_X(X) if not self._sliced_X else X
|
||||
self._sliced_X = True
|
||||
try:
|
||||
ret = operation(dL_dK, X, *args, **kw)
|
||||
except: raise
|
||||
finally:
|
||||
self._sliced_X = False
|
||||
return ret
|
||||
else:
|
||||
def x_slice_wrapper(dL_dK, X, X2=None, *args, **kw):
|
||||
X, X2 = self._slice_X(X) if not self._sliced_X else X, self._slice_X(X2) if X2 is not None and not self._sliced_X2 else X2
|
||||
self._sliced_X = True
|
||||
self._sliced_X2 = True
|
||||
try:
|
||||
ret = operation(dL_dK, X, X2, *args, **kw)
|
||||
except: raise
|
||||
finally:
|
||||
self._sliced_X = False
|
||||
self._sliced_X2 = False
|
||||
return ret
|
||||
else:
|
||||
if diag:
|
||||
def x_slice_wrapper(X, *args, **kw):
|
||||
X = self._slice_X(X) if not self._sliced_X else X
|
||||
self._sliced_X = True
|
||||
try:
|
||||
ret = operation(X, *args, **kw)
|
||||
except: raise
|
||||
finally:
|
||||
self._sliced_X = False
|
||||
return ret
|
||||
else:
|
||||
def x_slice_wrapper(X, X2=None, *args, **kw):
|
||||
X, X2 = self._slice_X(X) if not self._sliced_X else X, self._slice_X(X2) if X2 is not None and not self._sliced_X2 else X2
|
||||
self._sliced_X = True
|
||||
self._sliced_X2 = True
|
||||
try:
|
||||
ret = operation(X, X2, *args, **kw)
|
||||
except: raise
|
||||
finally:
|
||||
self._sliced_X = False
|
||||
self._sliced_X2 = False
|
||||
return ret
|
||||
x_slice_wrapper._operation = operation
|
||||
x_slice_wrapper.__name__ = ("slicer("+operation.__name__
|
||||
+(","+str(bool(diag)) if diag else'')
|
||||
+(','+str(bool(derivative)) if derivative else '')
|
||||
+')')
|
||||
x_slice_wrapper.__doc__ = "**sliced**\n\n" + (operation.__doc__ or "")
|
||||
return x_slice_wrapper
|
||||
|
||||
def K(self, X, X2):
|
||||
raise NotImplementedError
|
||||
def Kdiag(self, Xa):
|
||||
def Kdiag(self, X):
|
||||
raise NotImplementedError
|
||||
def psi0(self, Z, variational_posterior):
|
||||
raise NotImplementedError
|
||||
|
|
@ -34,13 +115,16 @@ class Kern(Parameterized):
|
|||
raise NotImplementedError
|
||||
def gradients_X(self, dL_dK, X, X2):
|
||||
raise NotImplementedError
|
||||
def gradients_X_diag(self, dL_dK, X):
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2):
|
||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||
raise NotImplementedError
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
"""Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
|
||||
raise NotImplementedError
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
"""
|
||||
Set the gradients of all parameters when doing inference with
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ class Stationary(Kern):
|
|||
if lengthscale.size != input_dim:
|
||||
lengthscale = np.ones(input_dim)*lengthscale
|
||||
else:
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
assert self.variance.size==1
|
||||
|
|
@ -85,12 +85,14 @@ class Stationary(Kern):
|
|||
Compute the Euclidean distance between each row of X and X2, or between
|
||||
each pair of rows of X if X2 is None.
|
||||
"""
|
||||
#X, = self._slice_X(X)
|
||||
if X2 is None:
|
||||
Xsq = np.sum(np.square(X),1)
|
||||
r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
|
||||
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
|
||||
return np.sqrt(r2)
|
||||
else:
|
||||
#X2, = self._slice_X(X2)
|
||||
X1sq = np.sum(np.square(X),1)
|
||||
X2sq = np.sum(np.square(X2),1)
|
||||
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
|
||||
|
|
@ -124,7 +126,6 @@ class Stationary(Kern):
|
|||
self.lengthscale.gradient = 0.
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
|
||||
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
|
||||
|
||||
#now the lengthscale gradient(s)
|
||||
|
|
@ -136,7 +137,7 @@ class Stationary(Kern):
|
|||
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
|
||||
tmp = dL_dr*self._inv_dist(X, X2)
|
||||
if X2 is None: X2 = X
|
||||
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(self._slice_X(X)[:,q:q+1] - self._slice_X(X2)[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||
else:
|
||||
r = self._scaled_dist(X, X2)
|
||||
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
|
||||
|
|
@ -176,7 +177,6 @@ class Stationary(Kern):
|
|||
ret = np.empty(X.shape, dtype=np.float64)
|
||||
[np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
|
||||
ret /= self.lengthscale**2
|
||||
|
||||
return ret
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue