From 62d594d9776f013b8900bb541adc051aaf1facd2 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 17 Mar 2014 15:43:09 +0000 Subject: [PATCH 1/5] slicing now returns the right shape, when computing derivative wrt X or Z --- GPy/kern/_src/add.py | 14 +++++--- GPy/kern/_src/kernel_slice_operations.py | 46 ++++++++++++++++++------ GPy/kern/_src/prod.py | 8 ++--- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 97afd1f0..ca1f4533 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -58,7 +58,13 @@ class Add(CombinationKernel): :type X2: np.ndarray (num_inducing x input_dim)""" target = np.zeros(X.shape) - [target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] + [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts] + return target + + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] + #[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] return target def psi0(self, Z, variational_posterior): @@ -131,7 +137,7 @@ class Add(CombinationKernel): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. - target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) return target def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): @@ -151,8 +157,8 @@ class Add(CombinationKernel): else: eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) - target_mu[:, p1.active_dims] += a - target_S[:, p1.active_dims] += b + target_mu += a + target_S += b return target_mu, target_S def _getstate(self): diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index ff33cc24..c355ccad 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -4,6 +4,7 @@ Created on 11 Mar 2014 @author: maxz ''' from ...core.parameterization.parameterized import ParametersChangedMeta +import numpy as np class KernCallsViaSlicerMeta(ParametersChangedMeta): def __call__(self, *args, **kw): @@ -12,18 +13,18 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) - instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True) - instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True) + instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True) + instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True) instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False) instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False) instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False) instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True) - instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True) - instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True) + instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True) + instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True) instance.parameters_changed() return instance -def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False): +def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False): """ This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. The different switches are: @@ -34,11 +35,16 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False """ if derivative: if diag: - def x_slice_wrapper(dL_dK, X): + def x_slice_wrapper(dL_dKdiag, X): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret = np.zeros(X.shape) X = kern._slice_X(X) if not kern._sliced_X else X + # if the return value is of shape X.shape, we need to make sure to return the right shape kern._sliced_X += 1 try: - ret = operation(dL_dK, X) + if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X) + else: ret = operation(dL_dKdiag, X) except: raise finally: @@ -46,10 +52,22 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret elif psi_stat: def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior kern._sliced_X += 1 + # if the return value is of shape X.shape, we need to make sure to return the right shape try: - ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) + if ret_X_not_sliced: + ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) + r2 = ret[:2] + ret[0] = ret1 + ret[1] = ret2 + ret[0][:, kern.active_dims] = r2[0] + ret[1][:, kern.active_dims] = r2[1] + del r2 + else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) except: raise finally: @@ -57,10 +75,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret elif psi_stat_Z: def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: ret = np.zeros(Z.shape) Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior kern._sliced_X += 1 try: - ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) + if ret_X_not_sliced: + ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) + else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) except: raise finally: @@ -68,10 +90,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret else: def x_slice_wrapper(dL_dK, X, X2=None): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret = np.zeros(X.shape) X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 kern._sliced_X += 1 try: - ret = operation(dL_dK, X, X2) + if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2) + else: ret = operation(dL_dK, X, X2) except: raise finally: diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index f3b2b50f..e00f38c3 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -51,15 +51,15 @@ class Prod(CombinationKernel): def gradients_X(self, dL_dK, X, X2=None): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target[:,k1.active_dims] += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) - target[:,k2.active_dims] += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) + target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) + target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) return target def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target[:,k1.active_dims] += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) - target[:,k2.active_dims] += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) + target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) + target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) return target From 19dc7cecf45dba1a617e634531372bd899266bd2 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 17 Mar 2014 16:22:16 +0000 Subject: [PATCH 2/5] slicing finished with independent outputs --- GPy/kern/_src/add.py | 1 - GPy/kern/_src/independent_outputs.py | 88 +++++++++++++++++++--------- GPy/testing/kernel_tests.py | 80 +++++++++++++++++-------- 3 files changed, 115 insertions(+), 54 deletions(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index ca1f4533..d1fd7cb8 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -64,7 +64,6 @@ class Add(CombinationKernel): def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] - #[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] return target def psi0(self, Z, variational_posterior): diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py index 1848bf6a..cf015d02 100644 --- a/GPy/kern/_src/independent_outputs.py +++ b/GPy/kern/_src/independent_outputs.py @@ -39,72 +39,102 @@ class IndependentOutputs(CombinationKernel): The index of the functions is given by the last column in the input X the rest of the columns of X are passed to the underlying kernel for computation (in blocks). - + + :param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels + the indices in the index_dim, index the kernels you gave! """ - def __init__(self, kern, index_dim=-1, name='independ'): + def __init__(self, kernels, index_dim=-1, name='independ'): assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces" - super(IndependentOutputs, self).__init__(kernels=[kern], extra_dims=[index_dim], name=name) + if not isinstance(kernels, list): + self.single_kern = True + self.kern = kernels + kernels = [kernels] + else: + self.single_kern = False + self.kern = kernels + super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name) self.index_dim = index_dim - self.kern = kern - #self.add_parameters(self.kern) + self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0]) def K(self,X ,X2=None): slices = index_to_slices(X[:,self.index_dim]) if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) - [[np.copyto(target[s,ss], self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)] else: slices2 = index_to_slices(X2[:,self.index_dim]) target = np.zeros((X.shape[0], X2.shape[0])) - [[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] return target def Kdiag(self,X): slices = index_to_slices(X[:,self.index_dim]) target = np.zeros(X.shape[0]) - [[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices] + [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] return target def update_gradients_full(self,dL_dK,X,X2=None): - target = np.zeros(self.kern.size) - def collate_grads(dL, X, X2): - self.kern.update_gradients_full(dL,X,X2) - target[:] += self.kern.gradient - slices = index_to_slices(X[:,self.index_dim]) + if self.single_kern: target = np.zeros(self.kern.size) + else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + def collate_grads(kern, i, dL, X, X2): + kern.update_gradients_full(dL,X,X2) + if self.single_kern: target[:] += kern.gradient + else: target[i][:] += kern.gradient if X2 is None: - [[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))] else: slices2 = index_to_slices(X2[:,self.index_dim]) - [[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] - self.kern.gradient = target + [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))] + if self.single_kern: kern.gradient = target + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] def gradients_X(self,dL_dK, X, X2=None): target = np.zeros(X.shape) - slices = index_to_slices(X[:,self.index_dim]) if X2 is None: - [[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,ss],X[s],X[ss])) for s, ss in itertools.product(slices_i, slices_i)] for slices_i in slices] + # TODO: make use of index_to_slices + values = np.unique(X[:,self.index_dim]) + slices = [X[:,self.index_dim]==i for i in values] + [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) + for kern, s in zip(self.kerns, slices)] + #slices = index_to_slices(X[:,self.index_dim]) + #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) + # for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + #import ipdb;ipdb.set_trace() + #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]), + # np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss])) + # for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)] else: - slices2 = index_to_slices(X2[:,self.index_dim]) - [[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + values = np.unique(X[:,self.index_dim]) + slices = [X[:,self.index_dim]==i for i in values] + slices2 = [X2[:,self.index_dim]==i for i in values] + [target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])) + for kern, s, s2 in zip(self.kerns, slices, slices2)] + # TODO: make work with index_to_slices + #slices = index_to_slices(X[:,self.index_dim]) + #slices2 = index_to_slices(X2[:,self.index_dim]) + #[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] return target def gradients_X_diag(self, dL_dKdiag, X): slices = index_to_slices(X[:,self.index_dim]) target = np.zeros(X.shape) - [[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices] + [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] return target def update_gradients_diag(self, dL_dKdiag, X): - target = np.zeros(self.kern.size) - def collate_grads(dL, X): - self.kern.update_gradients_diag(dL,X) - target[:] += self.kern.gradient slices = index_to_slices(X[:,self.index_dim]) - [[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices] - self.kern.gradient = target + if self.single_kern: target = np.zeros(self.kern.size) + else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + def collate_grads(kern, i, dL, X): + kern.update_gradients_diag(dL,X) + if self.single_kern: target[:] += kern.gradient + else: target[i][:] += kern.gradient + [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))] + if self.single_kern: kern.gradient = target + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] -class Hierarchical(Kern): +class Hierarchical(CombinationKernel): """ A kernel which can reopresent a simple hierarchical model. @@ -115,7 +145,7 @@ class Hierarchical(Kern): The index of the functions is given by additional columns in the input X. """ - def __init__(self, kerns, name='hierarchy'): + def __init__(self, kern, name='hierarchy'): assert all([k.input_dim==kerns[0].input_dim for k in kerns]) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) self.kerns = kerns diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index b057f8ef..b45d9919 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -94,7 +94,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX): -def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False): +def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None): """ This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite @@ -109,11 +109,11 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb """ pass_checks = True - if X==None: + if X is None: X = np.random.randn(10, kern.input_dim) if output_ind is not None: X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) - if X2==None: + if X2 is None: X2 = np.random.randn(20, kern.input_dim) if output_ind is not None: X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) @@ -164,7 +164,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if verbose: print("Checking gradients of K(X, X) wrt X.") try: - result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) + testmodel = Kern_check_dK_dX(kern, X=X, X2=None) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) except NotImplementedError: result=True if verbose: @@ -173,14 +176,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Check passed.") if not result: print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) + testmodel.checkgrad(verbose=True) pass_checks = False return False if verbose: print("Checking gradients of K(X, X2) wrt X.") try: - result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) + testmodel = Kern_check_dK_dX(kern, X=X, X2=X2) + if fixed_X_dims is not None: + testmodel.X[:,fixed_X_dims].fix() + result = testmodel.checkgrad(verbose=verbose) except NotImplementedError: result=True if verbose: @@ -188,8 +194,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if result and verbose: print("Check passed.") if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) + print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + testmodel.checkgrad(verbose=True) pass_checks = False return False @@ -300,24 +306,50 @@ class KernelTestsMiscellaneous(unittest.TestCase): class KernelTestsNonContinuous(unittest.TestCase): def setUp(self): - N = 100 - N1 = 110 - self.D = 2 - D = self.D - self.X = np.random.randn(N,D) - self.X2 = np.random.randn(N1,D) - self.X_block = np.zeros((N+N1, D+D+1)) - self.X_block[0:N, 0:D] = self.X - self.X_block[N:N+N1, D:D+D] = self.X2 - self.X_block[0:N, -1] = 1 - self.X_block[N:N+1, -1] = 2 - self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :] - + N0 = 3 + N1 = 9 + N2 = 4 + N = N0+N1+N2 + self.D = 3 + self.X = np.random.randn(N, self.D+1) + indices = np.random.random_integers(0, 2, size=N) + self.X[indices==0, -1] = 0 + self.X[indices==1, -1] = 1 + self.X[indices==2, -1] = 2 + #self.X = self.X[self.X[:, -1].argsort(), :] + self.X2 = np.random.randn((N0+N1)*2, self.D+1) + self.X2[:(N0*2), -1] = 0 + self.X2[(N0*2):, -1] = 1 + def test_IndependentOutputs(self): k = GPy.kern.RBF(self.D) - kern = GPy.kern.IndependentOutputs(k, -1) - self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, X2=self.X_block, verbose=verbose)) + kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') + self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) + k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] + kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') + self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) if __name__ == "__main__": print "Running unit tests, please be (very) patient..." - unittest.main() + #unittest.main() + np.random.seed(0) + N0 = 3 + N1 = 9 + N2 = 4 + N = N0+N1+N2 + D = 3 + X = np.random.randn(N, D+1) + indices = np.random.random_integers(0, 2, size=N) + X[indices==0, -1] = 0 + X[indices==1, -1] = 1 + X[indices==2, -1] = 2 + #X = X[X[:, -1].argsort(), :] + X2 = np.random.randn((N0+N1)*2, D+1) + X2[:(N0*2), -1] = 0 + X2[(N0*2):, -1] = 1 + k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] + kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') + assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) + k = GPy.kern.RBF(D) + kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') + assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) From 2ce3a93b3f38be176815d74ea47c2cb9bf128b33 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 17 Mar 2014 16:55:21 +0000 Subject: [PATCH 3/5] pickling working for array-likes, but observers not yet connected back --- GPy/core/parameterization/array_core.py | 12 ++++++++-- GPy/core/parameterization/param.py | 4 +++- GPy/core/parameterization/parameter_core.py | 7 +++--- GPy/testing/observable_tests.py | 26 ++++++++++----------- GPy/testing/parameterized_tests.py | 7 +++++- 5 files changed, 35 insertions(+), 21 deletions(-) diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index e3a5b137..6920e894 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -1,7 +1,7 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -__updated__ = '2013-12-16' +__updated__ = '2014-03-17' import numpy as np from parameter_core import Observable @@ -18,7 +18,7 @@ class ObservableArray(np.ndarray, Observable): if not isinstance(input_array, ObservableArray): obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) else: obj = input_array - cls.__name__ = "ObservableArray\n " + cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing super(ObservableArray, obj).__init__(*a, **kw) return obj @@ -30,6 +30,14 @@ class ObservableArray(np.ndarray, Observable): def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) + def __reduce__(self): + func, args, state = np.ndarray.__reduce__(self) + return func, args, (state, Observable._getstate(self)) + + def __setstate__(self, state): + np.ndarray.__setstate__(self, state[0]) + Observable._setstate(self, state[1]) + def _s_not_empty(self, s): # this checks whether there is something picked by this slice. return True diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 2ede8436..ed394806 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -269,6 +269,8 @@ class Param(OptimizationHandlable, ObservableArray): @property def _ties_str(self): return [''] + def _ties_for(self, ravi): + return [['N/A']]*ravi.size def __repr__(self, *args, **kwargs): name = "\033[1m{x:s}\033[0;0m:\n".format( x=self.hierarchy_name()) @@ -312,7 +314,7 @@ class Param(OptimizationHandlable, ObservableArray): ravi = self._raveled_index(filter_) if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi) if prirs is None: prirs = self.priors.properties_for(ravi) - if ties is None: ties = [['N/A']]*self.size + if ties is None: ties = self._ties_for(ravi) ties = [' '.join(map(lambda x: x, t)) for t in ties] if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__) if lx is None: lx = self._max_len_values() diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index f58143bd..0aab890c 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -16,7 +16,7 @@ Observable Pattern for patameterization from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED import numpy as np -__updated__ = '2014-03-14' +__updated__ = '2014-03-17' class HierarchyError(Exception): """ @@ -56,7 +56,7 @@ class InterfacePickleFunctions(object): """ raise NotImplementedError, "To be able to use pickling you need to implement this method" -class Pickleable(object): +class Pickleable(InterfacePickleFunctions): """ Make an object pickleable (See python doc 'pickling'). @@ -95,7 +95,7 @@ class Pickleable(object): def _has_get_set_state(self): return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__) -class Observable(InterfacePickleFunctions): +class Observable(Pickleable): """ Observable pattern for parameterization. @@ -155,6 +155,7 @@ class Observable(InterfacePickleFunctions): def _getstate(self): return [self._observer_callables_] + def _setstate(self, state): self._observer_callables_ = state.pop() diff --git a/GPy/testing/observable_tests.py b/GPy/testing/observable_tests.py index ebda1630..f8be4a48 100644 --- a/GPy/testing/observable_tests.py +++ b/GPy/testing/observable_tests.py @@ -8,7 +8,7 @@ from GPy.core.parameterization.parameterized import Parameterized from GPy.core.parameterization.param import Param import numpy -# One trigger in init +# One trigger in init _trigger_start = -1 class ParamTestParent(Parameterized): @@ -21,11 +21,9 @@ class ParameterizedTest(Parameterized): params_changed_count = _trigger_start def parameters_changed(self): self.params_changed_count += 1 - def _set_params(self, params, trigger_parent=True): - Parameterized._set_params(self, params, trigger_parent=trigger_parent) class Test(unittest.TestCase): - + def setUp(self): self.parent = ParamTestParent('test parent') self.par = ParameterizedTest('test model') @@ -41,12 +39,12 @@ class Test(unittest.TestCase): self.parent.add_parameter(self.par) self.parent.add_parameter(self.par2) - + self._observer_triggered = None self._trigger_count = 0 self._first = None self._second = None - + def _trigger(self, which): self._observer_triggered = float(which) self._trigger_count += 1 @@ -54,18 +52,18 @@ class Test(unittest.TestCase): self._second = self._trigger else: self._first = self._trigger - + def _trigger_priority(self, which): if self._first is not None: self._second = self._trigger_priority else: self._first = self._trigger_priority - + def test_observable(self): self.par.add_observer(self, self._trigger, -1) self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet') self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param') - + self.p[0,1] = 3 # trigger observers self.assertEqual(self._observer_triggered, 3, 'observer should have triggered') self.assertEqual(self._trigger_count, 1, 'observer should have triggered once') @@ -78,14 +76,14 @@ class Test(unittest.TestCase): self.assertEqual(self._trigger_count, 1, 'observer should have triggered once') self.assertEqual(self.par.params_changed_count, 2, 'params changed second') self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param') - + self.par.add_observer(self, self._trigger, -1) self.p[2,1] = 4 self.assertEqual(self._observer_triggered, 4, 'observer should have triggered') self.assertEqual(self._trigger_count, 2, 'observer should have triggered once') self.assertEqual(self.par.params_changed_count, 3, 'params changed second') self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param') - + self.par.remove_observer(self, self._trigger) self.p[0,1] = 3 self.assertEqual(self._observer_triggered, 4, 'observer should not have triggered') @@ -99,7 +97,7 @@ class Test(unittest.TestCase): self.par._trigger_params_changed() self.assertEqual(self.par.params_changed_count, 1, 'now params changed') self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count) - + self.par._param_array_[:] = 2 self.par._trigger_params_changed() self.assertEqual(self.par.params_changed_count, 2, 'now params changed') @@ -125,13 +123,13 @@ class Test(unittest.TestCase): self.par.remove_observer(self) self._first = self._second = None - + self.par.add_observer(self, self._trigger, 1) self.par.add_observer(self, self._trigger_priority, 0) self.par.notify_observers(0) self.assertEqual(self._first, self._trigger, 'priority should be second') self.assertEqual(self._second, self._trigger_priority, 'priority should be second') - + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.testName'] diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index 5b718cbd..754e95db 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -108,7 +108,7 @@ class ParameterizedTest(unittest.TestCase): self.assertEqual(self.param.constraints._offset, 3) def test_fixing_randomize(self): - self.white.fix(warning=False) + self.white.fix(warning=True) val = float(self.test1.white.variance) self.test1.randomize() self.assertEqual(val, self.white.variance) @@ -119,6 +119,11 @@ class ParameterizedTest(unittest.TestCase): self.testmodel.randomize() self.assertEqual(val, self.testmodel.kern.lengthscale) + def test_printing(self): + print self.test1 + print self.param + print self.test1[''] + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_add_parameter'] unittest.main() \ No newline at end of file From 64f44cf1796eb8ba4c0e794ad4d2183e663ead4e Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 17 Mar 2014 17:10:06 +0000 Subject: [PATCH 4/5] ObservableArray -> ObsAr, because of pickling and ndarray printing --- GPy/core/gp.py | 10 +++++----- GPy/core/parameterization/__init__.py | 2 +- GPy/core/parameterization/array_core.py | 14 +++++++------- GPy/core/parameterization/param.py | 4 ++-- GPy/likelihoods/gaussian.py | 3 +++ GPy/models/gp_regression.py | 4 ++-- GPy/testing/parameterized_tests.py | 8 ++++---- 7 files changed, 24 insertions(+), 21 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 70b7d695..38019fa7 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -7,7 +7,7 @@ import warnings from .. import kern from ..util.linalg import dtrtrs from model import Model -from parameterization import ObservableArray +from parameterization import ObsAr from .. import likelihoods from ..likelihoods.gaussian import Gaussian from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation @@ -31,19 +31,19 @@ class GP(Model): super(GP, self).__init__(name) assert X.ndim == 2 - if isinstance(X, (ObservableArray, VariationalPosterior)): + if isinstance(X, (ObsAr, VariationalPosterior)): self.X = X - else: self.X = ObservableArray(X) + else: self.X = ObsAr(X) self.num_data, self.input_dim = self.X.shape assert Y.ndim == 2 - self.Y = ObservableArray(Y) + self.Y = ObsAr(Y) assert Y.shape[0] == self.num_data _, self.output_dim = self.Y.shape if Y_metadata is None: - Y_metadata = {} + self.Y_metadata = {} else: self.Y_metadata = Y_metadata diff --git a/GPy/core/parameterization/__init__.py b/GPy/core/parameterization/__init__.py index ccbac39d..8e9aa094 100644 --- a/GPy/core/parameterization/__init__.py +++ b/GPy/core/parameterization/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from param import Param, ObservableArray +from param import Param, ObsAr from parameterized import Parameterized diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 6920e894..a120f004 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -6,20 +6,20 @@ __updated__ = '2014-03-17' import numpy as np from parameter_core import Observable -class ObservableArray(np.ndarray, Observable): +class ObsAr(np.ndarray, Observable): """ An ndarray which reports changes to its observers. The observers can add themselves with a callable, which will be called every time this array changes. The callable takes exactly one argument, which is this array itself. """ - __array_priority__ = -1 # Never give back ObservableArray + __array_priority__ = -1 # Never give back ObsAr def __new__(cls, input_array, *a, **kw): - if not isinstance(input_array, ObservableArray): + if not isinstance(input_array, ObsAr): obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) else: obj = input_array - cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing - super(ObservableArray, obj).__init__(*a, **kw) + #cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing + super(ObsAr, obj).__init__(*a, **kw) return obj def __array_finalize__(self, obj): @@ -54,7 +54,7 @@ class ObservableArray(np.ndarray, Observable): def __setitem__(self, s, val): if self._s_not_empty(s): - super(ObservableArray, self).__setitem__(s, val) + super(ObsAr, self).__setitem__(s, val) self.notify_observers(self[s]) def __getslice__(self, start, stop): @@ -64,7 +64,7 @@ class ObservableArray(np.ndarray, Observable): return self.__setitem__(slice(start, stop), val) def __copy__(self, *args): - return ObservableArray(self.view(np.ndarray).copy()) + return ObsAr(self.view(np.ndarray).copy()) def copy(self, *args): return self.__copy__(*args) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index ed394806..324593f9 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -4,7 +4,7 @@ import itertools import numpy from parameter_core import OptimizationHandlable, adjust_name_for_printing -from array_core import ObservableArray +from array_core import ObsAr ###### printing __constraints_name__ = "Constraint" @@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision __print_threshold__ = 5 ###### -class Param(OptimizationHandlable, ObservableArray): +class Param(OptimizationHandlable, ObsAr): """ Parameter object for GPy models. diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 101aac4b..4a6c5735 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -50,6 +50,9 @@ class Gaussian(Likelihood): if isinstance(gp_link, link_functions.Identity): self.log_concave = True + def gaussian_variance(self): + return self.variance + def covariance_matrix(self, Y, Y_metadata=None): return np.eye(Y.shape[0]) * self.variance diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index 5e83db09..86e64a54 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -20,14 +20,14 @@ class GPRegression(GP): """ - def __init__(self, X, Y, kernel=None): + def __init__(self, X, Y, kernel=None, Y_metadata=None): if kernel is None: kernel = kern.RBF(X.shape[1]) likelihood = likelihoods.Gaussian() - super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression') + super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata) def _getstate(self): return GP._getstate(self) diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index 754e95db..81c2dfdd 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -7,16 +7,16 @@ import unittest import GPy import numpy as np from GPy.core.parameterization.parameter_core import HierarchyError -from GPy.core.parameterization.array_core import ObservableArray +from GPy.core.parameterization.array_core import ObsAr class ArrayCoreTest(unittest.TestCase): def setUp(self): self.X = np.random.normal(1,1, size=(100,10)) - self.obsX = ObservableArray(self.X) + self.obsX = ObsAr(self.X) def test_init(self): - X = ObservableArray(self.X) - X2 = ObservableArray(X) + X = ObsAr(self.X) + X2 = ObsAr(X) self.assertIs(X, X2, "no new Observable array, when Observable is given") def test_slice(self): From 5acb66cf78f082dab3c96b58c426d1f01930204c Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 18 Mar 2014 12:35:28 +0000 Subject: [PATCH 5/5] bug fix w.r.t. var_dtc.py --- GPy/core/parameterization/variational.py | 21 +++++++++++++++++++ .../latent_function_inference/var_dtc.py | 8 +++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 01706922..5b3c4bca 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -126,6 +126,27 @@ class SpikeAndSlabPosterior(VariationalPosterior): super(SpikeAndSlabPosterior, self).__init__(means, variances, name) self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10)) self.add_parameter(self.gamma) + + def __getitem__(self, s): + if isinstance(s, (int, slice, tuple, list, np.ndarray)): + import copy + n = self.__new__(self.__class__, self.name) + dc = self.__dict__.copy() + dc['mean'] = self.mean[s] + dc['variance'] = self.variance[s] + dc['binary_prob'] = self.binary_prob[s] + dc['_parameters_'] = copy.copy(self._parameters_) + n.__dict__.update(dc) + n._parameters_[dc['mean']._parent_index_] = dc['mean'] + n._parameters_[dc['variance']._parent_index_] = dc['variance'] + n._parameters_[dc['binary_prob']._parent_index_] = dc['binary_prob'] + n.ndim = n.mean.ndim + n.shape = n.mean.shape + n.num_data = n.mean.shape[0] + n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1 + return n + else: + return super(VariationalPrior, self).__getitem__(s) def plot(self, *args): """ diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 82f6c2b9..e2aa95f5 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -134,7 +134,7 @@ class VarDTC(object): # log marginal likelihood log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, - psi0, A, LB, trYYT, data_fit, Y) + psi0, A, LB, trYYT, data_fit, VVT_factor) #put the gradients in the right places dL_dR = _compute_dL_dR(likelihood, @@ -208,7 +208,7 @@ class VarDTCMissingData(object): self._subarray_indices = [[slice(None),slice(None)]] return [Y], [(Y**2).sum()] - def inference(self, kern, X, Z, likelihood, Y): + def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): if isinstance(X, VariationalPosterior): uncertain_inputs = True psi0_all = kern.psi0(Z, X) @@ -305,7 +305,7 @@ class VarDTCMissingData(object): # log marginal likelihood log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, - psi0, A, LB, trYYT, data_fit) + psi0, A, LB, trYYT, data_fit,VVT_factor) #put the gradients in the right places dL_dR += _compute_dL_dR(likelihood, @@ -420,7 +420,7 @@ def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit,Y): #compute log marginal likelihood if het_noise: - lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * Y**2) + lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * np.square(Y).sum(axis=-1)) lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A)) else: lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT