2014-01-24 09:50:49 +00:00
|
|
|
'''
|
|
|
|
|
.. module:: GPy.util.subarray_and_sorting
|
|
|
|
|
|
|
|
|
|
.. moduleauthor:: Max Zwiessele <ibinbei@gmail.com>
|
|
|
|
|
|
|
|
|
|
'''
|
2014-05-21 16:32:06 +01:00
|
|
|
__updated__ = '2014-05-21'
|
2014-01-24 09:50:49 +00:00
|
|
|
|
2014-05-21 16:32:06 +01:00
|
|
|
import numpy as np, logging
|
2014-01-24 09:50:49 +00:00
|
|
|
|
|
|
|
|
def common_subarrays(X, axis=0):
|
|
|
|
|
"""
|
|
|
|
|
Find common subarrays of 2 dimensional X, where axis is the axis to apply the search over.
|
|
|
|
|
Common subarrays are returned as a dictionary of <subarray, [index]> pairs, where
|
|
|
|
|
the subarray is a tuple representing the subarray and the index is the index
|
|
|
|
|
for the subarray in X, where index is the index to the remaining axis.
|
2014-05-21 16:32:06 +01:00
|
|
|
|
2014-01-24 09:50:49 +00:00
|
|
|
:param :class:`np.ndarray` X: 2d array to check for common subarrays in
|
2014-06-27 15:17:12 -07:00
|
|
|
:param int axis: axis to apply subarray detection over.
|
|
|
|
|
When the index is 0, compare rows -- columns, otherwise.
|
2014-05-21 16:32:06 +01:00
|
|
|
|
2014-01-24 09:50:49 +00:00
|
|
|
Examples:
|
|
|
|
|
=========
|
|
|
|
|
|
2014-06-27 15:17:12 -07:00
|
|
|
In a 2d array:
|
2014-01-24 09:50:49 +00:00
|
|
|
>>> import numpy as np
|
|
|
|
|
>>> X = np.zeros((3,6), dtype=bool)
|
|
|
|
|
>>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1
|
|
|
|
|
>>> X
|
|
|
|
|
array([[False, False, False, False, False, False],
|
|
|
|
|
[ True, False, True, True, True, True],
|
|
|
|
|
[False, False, True, True, False, False]], dtype=bool)
|
|
|
|
|
>>> d = common_subarrays(X,axis=1)
|
|
|
|
|
>>> len(d)
|
|
|
|
|
3
|
|
|
|
|
>>> X[:, d[tuple(X[:,0])]]
|
|
|
|
|
array([[False, False, False],
|
|
|
|
|
[ True, True, True],
|
|
|
|
|
[False, False, False]], dtype=bool)
|
|
|
|
|
>>> d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5]
|
|
|
|
|
True
|
|
|
|
|
>>> d[tuple(X[:,1])]
|
|
|
|
|
[1]
|
|
|
|
|
"""
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
from itertools import count
|
|
|
|
|
from operator import iadd
|
|
|
|
|
assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays"
|
|
|
|
|
subarrays = defaultdict(list)
|
|
|
|
|
cnt = count()
|
2014-05-21 16:32:06 +01:00
|
|
|
def accumulate(x, s, c):
|
|
|
|
|
t = tuple(x)
|
|
|
|
|
col = c.next()
|
|
|
|
|
iadd(s[t], [col])
|
|
|
|
|
return None
|
|
|
|
|
if axis == 0: [accumulate(x, subarrays, cnt) for x in X]
|
|
|
|
|
else: [accumulate(x, subarrays, cnt) for x in X.T]
|
2014-01-24 09:50:49 +00:00
|
|
|
return subarrays
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
import doctest
|
2014-06-27 15:17:12 -07:00
|
|
|
doctest.testmod()
|