remo0ved slices from models

slices are now handles by special indexing kern parts, such as
coregionalisation, independent_outputs. The old slicing functionality
has been removed simply to clean up the code a little.

Now that input_slices still exist (and will continue to be useful) in
kern.py. They do need a little work though, for the psi-statistics
This commit is contained in:
James Hensman 2013-04-28 17:22:04 +01:00
parent ac842d51e6
commit 52ba8e4ba3
7 changed files with 103 additions and 175 deletions

View file

@ -13,15 +13,9 @@ from prod import prod
class kern(parameterised):
def __init__(self, D, parts=[], input_slices=None):
"""
This kernel does 'compound' structures.
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
The compund structure enables many features of GPy, including
- Hierarchical models
- Correleated output models
- multi-view learning
Hadamard product and outer-product kernels will require a new class.
This feature is currently WONTFIX. for small number sof inputs, you can use the sympy kernel for this.
The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used.
:param D: The dimensioality of the kernel's input space
:type D: int
@ -94,34 +88,6 @@ class kern(parameterised):
self.param_slices.append(slice(count, count + p.Nparam))
count += p.Nparam
def _process_slices(self, slices1=None, slices2=None):
"""
Format the slices so that they can easily be used.
Both slices can be any of three things:
- If None, the new points covary through every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
if the second arg is False, return only slices1
returns actual lists of slice objects
"""
if slices1 is None:
slices1 = [slice(None)] * self.Nparts
elif all([type(s_i) is bool for s_i in slices1]):
slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
else:
assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
if slices2 is None:
slices2 = [slice(None)] * self.Nparts
elif slices2 is False:
return slices1
elif all([type(s_i) is bool for s_i in slices2]):
slices2 = [slice(None) if s_i else slice(0) for s_i in slices2]
else:
assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
return slices1, slices2
def __add__(self, other):
assert self.D == other.D
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
@ -142,7 +108,7 @@ class kern(parameterised):
:param other: the other kernel to be added
:type other: GPy.kern
"""
return self +other
return self + other
def add_orthogonal(self, other):
"""
@ -285,18 +251,19 @@ class kern(parameterised):
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
def K(self, X, X2=None, slices1=None, slices2=None):
def K(self, X, X2=None, which_parts='all'):
if which_parts=='all':
which_parts = [True]*self.Nparts
assert X.shape[1] == self.D
slices1, slices2 = self._process_slices(slices1, slices2)
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[s1, i_s], None, target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
[p.K(X[:, i_s], X2[:,i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
return target
def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None):
def dK_dtheta(self, dL_dK, X, X2=None):
"""
:param dL_dK: An array of dL_dK derivaties, dL_dK
:type dL_dK: Np.ndarray (N x M)
@ -304,109 +271,94 @@ class kern(parameterised):
:type X: np.ndarray (N x D)
:param X2: Observed dara inputs (optional, defaults to X)
:type X2: np.ndarray (M x D)
:param slices1: a slice object for each kernel part, describing which data are affected by each kernel part
:type slices1: list of slice objects, or list of booleans
:param slices2: slices for X2
"""
assert X.shape[1] == self.D
slices1, slices2 = self._process_slices(slices1, slices2)
target = np.zeros(self.Nparam)
if X2 is None:
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], None, target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
else:
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dK_dX(self, dL_dK, X, X2=None, slices1=None, slices2=None):
def dK_dX(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
slices1, slices2 = self._process_slices(slices1, slices2)
target = np.zeros_like(X)
[p.dK_dX(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
if X2 is None:
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
else:
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def Kdiag(self, X, slices=None):
def Kdiag(self, X, which_parts='all'):
if which_parts=='all':
which_parts = [True]*self.Nparts
assert X.shape[1] == self.D
slices = self._process_slices(slices, False)
target = np.zeros(X.shape[0])
[p.Kdiag(X[s, i_s], target=target[s]) for p, i_s, s in zip(self.parts, self.input_slices, slices)]
[p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dKdiag_dtheta(self, dL_dKdiag, X, slices=None):
def dKdiag_dtheta(self, dL_dKdiag, X):
assert X.shape[1] == self.D
assert len(dL_dKdiag.shape) == 1
assert dL_dKdiag.size == X.shape[0]
slices = self._process_slices(slices, False)
target = np.zeros(self.Nparam)
[p.dKdiag_dtheta(dL_dKdiag[s], X[s, i_s], target[ps]) for p, i_s, s, ps in zip(self.parts, self.input_slices, slices, self.param_slices)]
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X, slices=None):
def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.D
slices = self._process_slices(slices, False)
target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag[s], X[s, i_s], target[s, i_s]) for p, i_s, s in zip(self.parts, self.input_slices, slices)]
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def psi0(self, Z, mu, S, slices=None):
slices = self._process_slices(slices, False)
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z, mu[s], S[s], target[s]) for p, s in zip(self.parts, slices)]
[p.psi0(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, slices=None):
slices = self._process_slices(slices, False)
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.Nparam)
[p.dpsi0_dtheta(dL_dpsi0[s], Z, mu[s], S[s], target[ps]) for p, ps, s in zip(self.parts, self.param_slices, slices)]
[p.dpsi0_dtheta(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, slices=None):
slices = self._process_slices(slices, False)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z, mu[s], S[s], target_mu[s], target_S[s]) for p, s in zip(self.parts, slices)]
[p.dpsi0_dmuS(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target_mu[:,i_s], target_S[:,i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S
def psi1(self, Z, mu, S, slices1=None, slices2=None):
"""Think N,M,Q """
slices1, slices2 = self._process_slices(slices1, slices2)
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[s2], mu[s1], S[s1], target[s1, s2]) for p, s1, s2 in zip(self.parts, slices1, slices2)]
[p.psi1(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
"""N,M,(Ntheta)"""
slices1, slices2 = self._process_slices(slices1, slices2)
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.Nparam))
[p.dpsi1_dtheta(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[ps]) for p, ps, s1, s2, i_s in zip(self.parts, self.param_slices, slices1, slices2, self.input_slices)]
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
"""N,M,Q"""
slices1, slices2 = self._process_slices(slices1, slices2)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s2, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are N,M,Q"""
slices1, slices2 = self._process_slices(slices1, slices2)
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target_mu[s1, i_s], target_S[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S
def psi2(self, Z, mu, S, slices1=None, slices2=None):
def psi2(self, Z, mu, S):
"""
:param Z: np.ndarray of inducing inputs (M x Q)
:param mu, S: np.ndarrays of means and variances (each N x Q)
:returns psi2: np.ndarray (N,M,M)
"""
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
slices1, slices2 = self._process_slices(slices1, slices2)
[p.psi2(Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s1, s2, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms
#TODO: input_slices needed
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
@ -434,14 +386,12 @@ class kern(parameterised):
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
"""Returns shape (N,M,M,Ntheta)"""
slices1, slices2 = self._process_slices(slices1, slices2)
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
target = np.zeros(self.Nparam)
[p.dpsi2_dtheta(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[ps]) for p, i_s, s1, s2, ps in zip(self.parts, self.input_slices, slices1, slices2, self.param_slices)]
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
# compute the "cross" terms
# TODO: better looping
# TODO: better looping, input_slices
for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
p1, p2 = self.parts[i1], self.parts[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
@ -478,12 +428,12 @@ class kern(parameterised):
return self._transform_gradients(target)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
slices1, slices2 = self._process_slices(slices1, slices2)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi2_dZ(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s2, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms
#TODO: we need input_slices here.
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
@ -506,16 +456,14 @@ class kern(parameterised):
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target * 2.
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
"""return shapes are N,M,M,Q"""
slices1, slices2 = self._process_slices(slices1, slices2)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target_mu[s1, i_s], target_S[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms
#TODO: we need input_slices here.
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':