From 19dc7cecf45dba1a617e634531372bd899266bd2 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 17 Mar 2014 16:22:16 +0000 Subject: [PATCH] slicing finished with independent outputs --- GPy/kern/_src/add.py | 1 - GPy/kern/_src/independent_outputs.py | 88 +++++++++++++++++++--------- GPy/testing/kernel_tests.py | 80 +++++++++++++++++-------- 3 files changed, 115 insertions(+), 54 deletions(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index ca1f4533..d1fd7cb8 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -64,7 +64,6 @@ class Add(CombinationKernel): def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] - #[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] return target def psi0(self, Z, variational_posterior): diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py index 1848bf6a..cf015d02 100644 --- a/GPy/kern/_src/independent_outputs.py +++ b/GPy/kern/_src/independent_outputs.py @@ -39,72 +39,102 @@ class IndependentOutputs(CombinationKernel): The index of the functions is given by the last column in the input X the rest of the columns of X are passed to the underlying kernel for computation (in blocks). - + + :param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels + the indices in the index_dim, index the kernels you gave! """ - def __init__(self, kern, index_dim=-1, name='independ'): + def __init__(self, kernels, index_dim=-1, name='independ'): assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces" - super(IndependentOutputs, self).__init__(kernels=[kern], extra_dims=[index_dim], name=name) + if not isinstance(kernels, list): + self.single_kern = True + self.kern = kernels + kernels = [kernels] + else: + self.single_kern = False + self.kern = kernels + super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name) self.index_dim = index_dim - self.kern = kern - #self.add_parameters(self.kern) + self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0]) def K(self,X ,X2=None): slices = index_to_slices(X[:,self.index_dim]) if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) - [[np.copyto(target[s,ss], self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)] else: slices2 = index_to_slices(X2[:,self.index_dim]) target = np.zeros((X.shape[0], X2.shape[0])) - [[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] return target def Kdiag(self,X): slices = index_to_slices(X[:,self.index_dim]) target = np.zeros(X.shape[0]) - [[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices] + [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] return target def update_gradients_full(self,dL_dK,X,X2=None): - target = np.zeros(self.kern.size) - def collate_grads(dL, X, X2): - self.kern.update_gradients_full(dL,X,X2) - target[:] += self.kern.gradient - slices = index_to_slices(X[:,self.index_dim]) + if self.single_kern: target = np.zeros(self.kern.size) + else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + def collate_grads(kern, i, dL, X, X2): + kern.update_gradients_full(dL,X,X2) + if self.single_kern: target[:] += kern.gradient + else: target[i][:] += kern.gradient if X2 is None: - [[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))] else: slices2 = index_to_slices(X2[:,self.index_dim]) - [[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] - self.kern.gradient = target + [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))] + if self.single_kern: kern.gradient = target + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] def gradients_X(self,dL_dK, X, X2=None): target = np.zeros(X.shape) - slices = index_to_slices(X[:,self.index_dim]) if X2 is None: - [[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,ss],X[s],X[ss])) for s, ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + # TODO: make use of index_to_slices + values = np.unique(X[:,self.index_dim]) + slices = [X[:,self.index_dim]==i for i in values] + [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) + for kern, s in zip(self.kerns, slices)] + #slices = index_to_slices(X[:,self.index_dim]) + #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) + # for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + #import ipdb;ipdb.set_trace() + #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]), + # np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss])) + # for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)] else: - slices2 = index_to_slices(X2[:,self.index_dim]) - [[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + values = np.unique(X[:,self.index_dim]) + slices = [X[:,self.index_dim]==i for i in values] + slices2 = [X2[:,self.index_dim]==i for i in values] + [target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])) + for kern, s, s2 in zip(self.kerns, slices, slices2)] + # TODO: make work with index_to_slices + #slices = index_to_slices(X[:,self.index_dim]) + #slices2 = index_to_slices(X2[:,self.index_dim]) + #[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] return target def gradients_X_diag(self, dL_dKdiag, X): slices = index_to_slices(X[:,self.index_dim]) target = np.zeros(X.shape) - [[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices] + [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] return target def update_gradients_diag(self, dL_dKdiag, X): - target = np.zeros(self.kern.size) - def collate_grads(dL, X): - self.kern.update_gradients_diag(dL,X) - target[:] += self.kern.gradient slices = index_to_slices(X[:,self.index_dim]) - [[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices] - self.kern.gradient = target + if self.single_kern: target = np.zeros(self.kern.size) + else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + def collate_grads(kern, i, dL, X): + kern.update_gradients_diag(dL,X) + if self.single_kern: target[:] += kern.gradient + else: target[i][:] += kern.gradient + [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))] + if self.single_kern: kern.gradient = target + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] -class Hierarchical(Kern): +class Hierarchical(CombinationKernel): """ A kernel which can reopresent a simple hierarchical model. @@ -115,7 +145,7 @@ class Hierarchical(Kern): The index of the functions is given by additional columns in the input X. """ - def __init__(self, kerns, name='hierarchy'): + def __init__(self, kern, name='hierarchy'): assert all([k.input_dim==kerns[0].input_dim for k in kerns]) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) self.kerns = kerns diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index b057f8ef..b45d9919 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -94,7 +94,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX): -def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False): +def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None): """ This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite @@ -109,11 +109,11 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb """ pass_checks = True - if X==None: + if X is None: X = np.random.randn(10, kern.input_dim) if output_ind is not None: X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) - if X2==None: + if X2 is None: X2 = np.random.randn(20, kern.input_dim) if output_ind is not None: X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) @@ -164,7 +164,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if verbose: print("Checking gradients of K(X, X) wrt X.") try: - result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) + testmodel = Kern_check_dK_dX(kern, X=X, X2=None) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) except NotImplementedError: result=True if verbose: @@ -173,14 +176,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Check passed.") if not result: print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) + testmodel.checkgrad(verbose=True) pass_checks = False return False if verbose: print("Checking gradients of K(X, X2) wrt X.") try: - result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) + testmodel = Kern_check_dK_dX(kern, X=X, X2=X2) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) except NotImplementedError: result=True if verbose: @@ -188,8 +194,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if result and verbose: print("Check passed.") if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) + print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + testmodel.checkgrad(verbose=True) pass_checks = False return False @@ -300,24 +306,50 @@ class KernelTestsMiscellaneous(unittest.TestCase): class KernelTestsNonContinuous(unittest.TestCase): def setUp(self): - N = 100 - N1 = 110 - self.D = 2 - D = self.D - self.X = np.random.randn(N,D) - self.X2 = np.random.randn(N1,D) - self.X_block = np.zeros((N+N1, D+D+1)) - self.X_block[0:N, 0:D] = self.X - self.X_block[N:N+N1, D:D+D] = self.X2 - self.X_block[0:N, -1] = 1 - self.X_block[N:N+1, -1] = 2 - self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :] - + N0 = 3 + N1 = 9 + N2 = 4 + N = N0+N1+N2 + self.D = 3 + self.X = np.random.randn(N, self.D+1) + indices = np.random.random_integers(0, 2, size=N) + self.X[indices==0, -1] = 0 + self.X[indices==1, -1] = 1 + self.X[indices==2, -1] = 2 + #self.X = self.X[self.X[:, -1].argsort(), :] + self.X2 = np.random.randn((N0+N1)*2, self.D+1) + self.X2[:(N0*2), -1] = 0 + self.X2[(N0*2):, -1] = 1 + def test_IndependentOutputs(self): k = GPy.kern.RBF(self.D) - kern = GPy.kern.IndependentOutputs(k, -1) - self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, X2=self.X_block, verbose=verbose)) + kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') + self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) + k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] + kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') + self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) if __name__ == "__main__": print "Running unit tests, please be (very) patient..." - unittest.main() + #unittest.main() + np.random.seed(0) + N0 = 3 + N1 = 9 + N2 = 4 + N = N0+N1+N2 + D = 3 + X = np.random.randn(N, D+1) + indices = np.random.random_integers(0, 2, size=N) + X[indices==0, -1] = 0 + X[indices==1, -1] = 1 + X[indices==2, -1] = 2 + #X = X[X[:, -1].argsort(), :] + X2 = np.random.randn((N0+N1)*2, D+1) + X2[:(N0*2), -1] = 0 + X2[(N0*2):, -1] = 1 + k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] + kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') + assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) + k = GPy.kern.RBF(D) + kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') + assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))