From 415e3256c0767a938ecab3e855a3d2d2c85d2adf Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 16 Dec 2013 11:35:47 +0000 Subject: [PATCH] subarray indexing --- GPy/util/subarray_and_sorting.py | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 GPy/util/subarray_and_sorting.py diff --git a/GPy/util/subarray_and_sorting.py b/GPy/util/subarray_and_sorting.py new file mode 100644 index 00000000..49385771 --- /dev/null +++ b/GPy/util/subarray_and_sorting.py @@ -0,0 +1,56 @@ +''' +.. module:: GPy.util.subarray_and_sorting + +.. moduleauthor:: Max Zwiessele + +''' +__updated__ = '2013-12-02' + +import numpy as np + +def common_subarrays(X, axis=0): + """ + Find common subarrays of 2 dimensional X, where axis is the axis to apply the search over. + Common subarrays are returned as a dictionary of pairs, where + the subarray is a tuple representing the subarray and the index is the index + for the subarray in X, where index is the index to the remaining axis. + + :param :class:`np.ndarray` X: 2d array to check for common subarrays in + :param int axis: axis to apply subarray detection over. + When the index is 0, compare rows, columns, otherwise. + + Examples: + ========= + + In a 2d array: + >>> import numpy as np + >>> X = np.zeros((3,6), dtype=bool) + >>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1 + >>> X + array([[False, False, False, False, False, False], + [ True, False, True, True, True, True], + [False, False, True, True, False, False]], dtype=bool) + >>> d = common_subarrays(X,axis=1) + >>> len(d) + 3 + >>> X[:, d[tuple(X[:,0])]] + array([[False, False, False], + [ True, True, True], + [False, False, False]], dtype=bool) + >>> d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5] + True + >>> d[tuple(X[:,1])] + [1] + """ + from collections import defaultdict + from itertools import count + from operator import iadd + assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays" + subarrays = defaultdict(list) + cnt = count() + np.apply_along_axis(lambda x: iadd(subarrays[tuple(x)], [cnt.next()]), 1-axis, X) + return subarrays + +if __name__ == '__main__': + import doctest + doctest.testmod() \ No newline at end of file