slicing finished with independent outputs

This commit is contained in:
Max Zwiessele 2014-03-17 16:22:16 +00:00
parent 62d594d977
commit 19dc7cecf4
3 changed files with 115 additions and 54 deletions

View file

@ -64,7 +64,6 @@ class Add(CombinationKernel):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
#[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target return target
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):

View file

@ -39,72 +39,102 @@ class IndependentOutputs(CombinationKernel):
The index of the functions is given by the last column in the input X The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the underlying kernel for computation (in blocks). the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
:param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels
the indices in the index_dim, index the kernels you gave!
""" """
def __init__(self, kern, index_dim=-1, name='independ'): def __init__(self, kernels, index_dim=-1, name='independ'):
assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces" assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
super(IndependentOutputs, self).__init__(kernels=[kern], extra_dims=[index_dim], name=name) if not isinstance(kernels, list):
self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim self.index_dim = index_dim
self.kern = kern self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
#self.add_parameters(self.kern)
def K(self,X ,X2=None): def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
if X2 is None: if X2 is None:
target = np.zeros((X.shape[0], X.shape[0])) target = np.zeros((X.shape[0], X.shape[0]))
[[np.copyto(target[s,ss], self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
[[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
return target return target
def Kdiag(self,X): def Kdiag(self,X):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices] [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
return target return target
def update_gradients_full(self,dL_dK,X,X2=None): def update_gradients_full(self,dL_dK,X,X2=None):
target = np.zeros(self.kern.size)
def collate_grads(dL, X, X2):
self.kern.update_gradients_full(dL,X,X2)
target[:] += self.kern.gradient
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
if self.single_kern: target = np.zeros(self.kern.size)
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
def collate_grads(kern, i, dL, X, X2):
kern.update_gradients_full(dL,X,X2)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
if X2 is None: if X2 is None:
[[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))]
self.kern.gradient = target if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
slices = index_to_slices(X[:,self.index_dim])
if X2 is None: if X2 is None:
[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,ss],X[s],X[ss])) for s, ss in itertools.product(slices_i, slices_i)] for slices_i in slices] # TODO: make use of index_to_slices
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(self.kerns, slices)]
#slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
#import ipdb;ipdb.set_trace()
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
[[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
for kern, s, s2 in zip(self.kerns, slices, slices2)]
# TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim])
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape) target = np.zeros(X.shape)
[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices] [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
return target return target
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
target = np.zeros(self.kern.size)
def collate_grads(dL, X):
self.kern.update_gradients_diag(dL,X)
target[:] += self.kern.gradient
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices] if self.single_kern: target = np.zeros(self.kern.size)
self.kern.gradient = target else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
def collate_grads(kern, i, dL, X):
kern.update_gradients_diag(dL,X)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
class Hierarchical(Kern): class Hierarchical(CombinationKernel):
""" """
A kernel which can reopresent a simple hierarchical model. A kernel which can reopresent a simple hierarchical model.
@ -115,7 +145,7 @@ class Hierarchical(Kern):
The index of the functions is given by additional columns in the input X. The index of the functions is given by additional columns in the input X.
""" """
def __init__(self, kerns, name='hierarchy'): def __init__(self, kern, name='hierarchy'):
assert all([k.input_dim==kerns[0].input_dim for k in kerns]) assert all([k.input_dim==kerns[0].input_dim for k in kerns])
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
self.kerns = kerns self.kerns = kerns

View file

@ -94,7 +94,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False): def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
""" """
This function runs on kernels to check the correctness of their This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite implementation. It checks that the covariance function is positive definite
@ -109,11 +109,11 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
""" """
pass_checks = True pass_checks = True
if X==None: if X is None:
X = np.random.randn(10, kern.input_dim) X = np.random.randn(10, kern.input_dim)
if output_ind is not None: if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None: if X2 is None:
X2 = np.random.randn(20, kern.input_dim) X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None: if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
@ -164,7 +164,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if verbose: if verbose:
print("Checking gradients of K(X, X) wrt X.") print("Checking gradients of K(X, X) wrt X.")
try: try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) testmodel = Kern_check_dK_dX(kern, X=X, X2=None)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
@ -173,14 +176,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
print("Check passed.") print("Check passed.")
if not result: if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) testmodel.checkgrad(verbose=True)
pass_checks = False pass_checks = False
return False return False
if verbose: if verbose:
print("Checking gradients of K(X, X2) wrt X.") print("Checking gradients of K(X, X2) wrt X.")
try: try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) testmodel = Kern_check_dK_dX(kern, X=X, X2=X2)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
@ -188,8 +194,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) testmodel.checkgrad(verbose=True)
pass_checks = False pass_checks = False
return False return False
@ -300,24 +306,50 @@ class KernelTestsMiscellaneous(unittest.TestCase):
class KernelTestsNonContinuous(unittest.TestCase): class KernelTestsNonContinuous(unittest.TestCase):
def setUp(self): def setUp(self):
N = 100 N0 = 3
N1 = 110 N1 = 9
self.D = 2 N2 = 4
D = self.D N = N0+N1+N2
self.X = np.random.randn(N,D) self.D = 3
self.X2 = np.random.randn(N1,D) self.X = np.random.randn(N, self.D+1)
self.X_block = np.zeros((N+N1, D+D+1)) indices = np.random.random_integers(0, 2, size=N)
self.X_block[0:N, 0:D] = self.X self.X[indices==0, -1] = 0
self.X_block[N:N+N1, D:D+D] = self.X2 self.X[indices==1, -1] = 1
self.X_block[0:N, -1] = 1 self.X[indices==2, -1] = 2
self.X_block[N:N+1, -1] = 2 #self.X = self.X[self.X[:, -1].argsort(), :]
self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :] self.X2 = np.random.randn((N0+N1)*2, self.D+1)
self.X2[:(N0*2), -1] = 0
self.X2[(N0*2):, -1] = 1
def test_IndependentOutputs(self): def test_IndependentOutputs(self):
k = GPy.kern.RBF(self.D) k = GPy.kern.RBF(self.D)
kern = GPy.kern.IndependentOutputs(k, -1) kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, X2=self.X_block, verbose=verbose)) self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
unittest.main() #unittest.main()
np.random.seed(0)
N0 = 3
N1 = 9
N2 = 4
N = N0+N1+N2
D = 3
X = np.random.randn(N, D+1)
indices = np.random.random_integers(0, 2, size=N)
X[indices==0, -1] = 0
X[indices==1, -1] = 1
X[indices==2, -1] = 2
#X = X[X[:, -1].argsort(), :]
X2 = np.random.randn((N0+N1)*2, D+1)
X2[:(N0*2), -1] = 0
X2[(N0*2):, -1] = 1
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
k = GPy.kern.RBF(D)
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))