mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-05 01:32:40 +02:00
remo0ved slices from models
slices are now handles by special indexing kern parts, such as coregionalisation, independent_outputs. The old slicing functionality has been removed simply to clean up the code a little. Now that input_slices still exist (and will continue to be useful) in kern.py. They do need a little work though, for the psi-statistics
This commit is contained in:
parent
ac842d51e6
commit
52ba8e4ba3
7 changed files with 103 additions and 175 deletions
152
GPy/kern/kern.py
152
GPy/kern/kern.py
|
|
@ -13,15 +13,9 @@ from prod import prod
|
|||
class kern(parameterised):
|
||||
def __init__(self, D, parts=[], input_slices=None):
|
||||
"""
|
||||
This kernel does 'compound' structures.
|
||||
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
|
||||
|
||||
The compund structure enables many features of GPy, including
|
||||
- Hierarchical models
|
||||
- Correleated output models
|
||||
- multi-view learning
|
||||
|
||||
Hadamard product and outer-product kernels will require a new class.
|
||||
This feature is currently WONTFIX. for small number sof inputs, you can use the sympy kernel for this.
|
||||
The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used.
|
||||
|
||||
:param D: The dimensioality of the kernel's input space
|
||||
:type D: int
|
||||
|
|
@ -94,34 +88,6 @@ class kern(parameterised):
|
|||
self.param_slices.append(slice(count, count + p.Nparam))
|
||||
count += p.Nparam
|
||||
|
||||
def _process_slices(self, slices1=None, slices2=None):
|
||||
"""
|
||||
Format the slices so that they can easily be used.
|
||||
Both slices can be any of three things:
|
||||
- If None, the new points covary through every kernel part (default)
|
||||
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
|
||||
- If a list of booleans, specifying which kernel parts are active
|
||||
|
||||
if the second arg is False, return only slices1
|
||||
|
||||
returns actual lists of slice objects
|
||||
"""
|
||||
if slices1 is None:
|
||||
slices1 = [slice(None)] * self.Nparts
|
||||
elif all([type(s_i) is bool for s_i in slices1]):
|
||||
slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
|
||||
else:
|
||||
assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
|
||||
if slices2 is None:
|
||||
slices2 = [slice(None)] * self.Nparts
|
||||
elif slices2 is False:
|
||||
return slices1
|
||||
elif all([type(s_i) is bool for s_i in slices2]):
|
||||
slices2 = [slice(None) if s_i else slice(0) for s_i in slices2]
|
||||
else:
|
||||
assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
|
||||
return slices1, slices2
|
||||
|
||||
def __add__(self, other):
|
||||
assert self.D == other.D
|
||||
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
|
||||
|
|
@ -142,7 +108,7 @@ class kern(parameterised):
|
|||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
"""
|
||||
return self +other
|
||||
return self + other
|
||||
|
||||
def add_orthogonal(self, other):
|
||||
"""
|
||||
|
|
@ -285,18 +251,19 @@ class kern(parameterised):
|
|||
|
||||
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
|
||||
|
||||
def K(self, X, X2=None, slices1=None, slices2=None):
|
||||
def K(self, X, X2=None, which_parts='all'):
|
||||
if which_parts=='all':
|
||||
which_parts = [True]*self.Nparts
|
||||
assert X.shape[1] == self.D
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0]))
|
||||
[p.K(X[s1, i_s], None, target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
|
||||
else:
|
||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||
[p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
[p.K(X[:, i_s], X2[:,i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
|
||||
return target
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None):
|
||||
def dK_dtheta(self, dL_dK, X, X2=None):
|
||||
"""
|
||||
:param dL_dK: An array of dL_dK derivaties, dL_dK
|
||||
:type dL_dK: Np.ndarray (N x M)
|
||||
|
|
@ -304,109 +271,94 @@ class kern(parameterised):
|
|||
:type X: np.ndarray (N x D)
|
||||
:param X2: Observed dara inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (M x D)
|
||||
:param slices1: a slice object for each kernel part, describing which data are affected by each kernel part
|
||||
:type slices1: list of slice objects, or list of booleans
|
||||
:param slices2: slices for X2
|
||||
"""
|
||||
assert X.shape[1] == self.D
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
target = np.zeros(self.Nparam)
|
||||
if X2 is None:
|
||||
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], None, target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
|
||||
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
|
||||
else:
|
||||
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
|
||||
|
||||
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
|
||||
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2=None, slices1=None, slices2=None):
|
||||
def dK_dX(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
target = np.zeros_like(X)
|
||||
[p.dK_dX(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
if X2 is None:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
else:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
return target
|
||||
|
||||
def Kdiag(self, X, slices=None):
|
||||
def Kdiag(self, X, which_parts='all'):
|
||||
if which_parts=='all':
|
||||
which_parts = [True]*self.Nparts
|
||||
assert X.shape[1] == self.D
|
||||
slices = self._process_slices(slices, False)
|
||||
target = np.zeros(X.shape[0])
|
||||
[p.Kdiag(X[s, i_s], target=target[s]) for p, i_s, s in zip(self.parts, self.input_slices, slices)]
|
||||
[p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, slices=None):
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X):
|
||||
assert X.shape[1] == self.D
|
||||
assert len(dL_dKdiag.shape) == 1
|
||||
assert dL_dKdiag.size == X.shape[0]
|
||||
slices = self._process_slices(slices, False)
|
||||
target = np.zeros(self.Nparam)
|
||||
[p.dKdiag_dtheta(dL_dKdiag[s], X[s, i_s], target[ps]) for p, i_s, s, ps in zip(self.parts, self.input_slices, slices, self.param_slices)]
|
||||
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, slices=None):
|
||||
def dKdiag_dX(self, dL_dKdiag, X):
|
||||
assert X.shape[1] == self.D
|
||||
slices = self._process_slices(slices, False)
|
||||
target = np.zeros_like(X)
|
||||
[p.dKdiag_dX(dL_dKdiag[s], X[s, i_s], target[s, i_s]) for p, i_s, s in zip(self.parts, self.input_slices, slices)]
|
||||
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
return target
|
||||
|
||||
def psi0(self, Z, mu, S, slices=None):
|
||||
slices = self._process_slices(slices, False)
|
||||
def psi0(self, Z, mu, S):
|
||||
target = np.zeros(mu.shape[0])
|
||||
[p.psi0(Z, mu[s], S[s], target[s]) for p, s in zip(self.parts, slices)]
|
||||
[p.psi0(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, slices=None):
|
||||
slices = self._process_slices(slices, False)
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
|
||||
target = np.zeros(self.Nparam)
|
||||
[p.dpsi0_dtheta(dL_dpsi0[s], Z, mu[s], S[s], target[ps]) for p, ps, s in zip(self.parts, self.param_slices, slices)]
|
||||
[p.dpsi0_dtheta(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, slices=None):
|
||||
slices = self._process_slices(slices, False)
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
|
||||
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
|
||||
[p.dpsi0_dmuS(dL_dpsi0, Z, mu[s], S[s], target_mu[s], target_S[s]) for p, s in zip(self.parts, slices)]
|
||||
[p.dpsi0_dmuS(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target_mu[:,i_s], target_S[:,i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
return target_mu, target_S
|
||||
|
||||
def psi1(self, Z, mu, S, slices1=None, slices2=None):
|
||||
"""Think N,M,Q """
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
def psi1(self, Z, mu, S):
|
||||
target = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
[p.psi1(Z[s2], mu[s1], S[s1], target[s1, s2]) for p, s1, s2 in zip(self.parts, slices1, slices2)]
|
||||
[p.psi1(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
|
||||
"""N,M,(Ntheta)"""
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
|
||||
target = np.zeros((self.Nparam))
|
||||
[p.dpsi1_dtheta(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[ps]) for p, ps, s1, s2, i_s in zip(self.parts, self.param_slices, slices1, slices2, self.input_slices)]
|
||||
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
|
||||
"""N,M,Q"""
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
|
||||
target = np.zeros_like(Z)
|
||||
[p.dpsi1_dZ(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s2, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, slices1=None, slices2=None):
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
|
||||
"""return shapes are N,M,Q"""
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||
[p.dpsi1_dmuS(dL_dpsi1[s2, s1], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target_mu[s1, i_s], target_S[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
return target_mu, target_S
|
||||
|
||||
def psi2(self, Z, mu, S, slices1=None, slices2=None):
|
||||
def psi2(self, Z, mu, S):
|
||||
"""
|
||||
:param Z: np.ndarray of inducing inputs (M x Q)
|
||||
:param mu, S: np.ndarrays of means and variances (each N x Q)
|
||||
:returns psi2: np.ndarray (N,M,M)
|
||||
"""
|
||||
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
[p.psi2(Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s1, s2, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
||||
# compute the "cross" terms
|
||||
#TODO: input_slices needed
|
||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||
# white doesn;t combine with anything
|
||||
if p1.name == 'white' or p2.name == 'white':
|
||||
|
|
@ -434,14 +386,12 @@ class kern(parameterised):
|
|||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
return target
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
|
||||
"""Returns shape (N,M,M,Ntheta)"""
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
|
||||
target = np.zeros(self.Nparam)
|
||||
[p.dpsi2_dtheta(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[ps]) for p, i_s, s1, s2, ps in zip(self.parts, self.input_slices, slices1, slices2, self.param_slices)]
|
||||
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: better looping
|
||||
# TODO: better looping, input_slices
|
||||
for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
|
||||
p1, p2 = self.parts[i1], self.parts[i2]
|
||||
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
||||
|
|
@ -478,12 +428,12 @@ class kern(parameterised):
|
|||
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
|
||||
target = np.zeros_like(Z)
|
||||
[p.dpsi2_dZ(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target[s2, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
||||
# compute the "cross" terms
|
||||
#TODO: we need input_slices here.
|
||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||
# white doesn;t combine with anything
|
||||
if p1.name == 'white' or p2.name == 'white':
|
||||
|
|
@ -506,16 +456,14 @@ class kern(parameterised):
|
|||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
|
||||
|
||||
return target * 2.
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, slices1=None, slices2=None):
|
||||
"""return shapes are N,M,M,Q"""
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
|
||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||
[p.dpsi2_dmuS(dL_dpsi2[s1, s2, s2], Z[s2, i_s], mu[s1, i_s], S[s1, i_s], target_mu[s1, i_s], target_S[s1, i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
||||
# compute the "cross" terms
|
||||
#TODO: we need input_slices here.
|
||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||
# white doesn;t combine with anything
|
||||
if p1.name == 'white' or p2.name == 'white':
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue