Skip to content

ENH: per axis and per level indexing (orig GH6134) #6301

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 13, 2014
Prev Previous commit
Next Next commit
CLN: move indexing loc changes to index.py
  • Loading branch information
jreback committed Feb 13, 2014
commit 30eb6dbbbe75cbbad36bd0d18bbb9563139bc009
173 changes: 173 additions & 0 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3249,6 +3249,179 @@ def _get_level_indexer(self, key, level=0):
j = labels.searchsorted(loc, side='right')
return slice(i, j)

def get_locs(self, tup):
"""Convert a tuple of slices/label lists/labels to a level-wise spec

Parameters
----------
self: a sufficiently lexsorted, unique/non-dupe MultIindex.
tup: a tuple of slices, labels or lists of labels.
slice(None) is acceptable, and the case of len(tup)<ix.nlevels
will have labels from trailing levels included.

Returns
-------
a list containing ix.nlevels elements of either:
- 2-tuple representing a (start,stop) slice
or
- a list of label positions.

The positions are relative to the labels of the corresponding level, not to
the entire unrolled index.

Example (This is *not* a doctest):
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
>>> for x in mi.get_values(): print(x)
('A0', 'B0')
('A0', 'B1')
('A1', 'B0')
('A1', 'B1')
('A2', 'B0')
('A2', 'B1')
>>> mi.get_specs((slice('A0','A2'),['B0', 'B1']))
[(0, 2), [0, 1]]

read as:
- All labels in position [0,1) in first level
- for each of those, all labels at positions 0 or 1.

The same effective result can be achieved by specifying the None Slice,
or omitting it completely. Note the tuple (0,2) has replaced the list [0 1],
but the outcome is the same.

>>> mi.get_locs((slice('A0','A2'),slice(None)))
[(0, 2), (0,2)]

>>> mi.get_locs((slice('A0','A2'),))
[(0, 2), (0,2)]

"""

ranges = []

# self must be lexsorted to at least as many levels
# as there are elements in `tup`
assert self.is_lexsorted_for_tuple(tup)
assert self.is_unique
assert isinstance(self,MultiIndex)

for i,k in enumerate(tup):
level = self.levels[i]

if com.is_list_like(k):
# a collection of labels to include from this level
ranges.append([level.get_loc(x) for x in k])
continue
if k == slice(None):
start = 0
stop = len(level)
elif isinstance(k,slice):
start = level.get_loc(k.start)
stop = len(level)
if k.stop:
stop = level.get_loc(k.stop)
else:
# a single label
start = level.get_loc(k)
stop = start

ranges.append((start,stop))

for i in range(i+1,len(self.levels)):
# omitting trailing dims
# means include all values
level = self.levels[i]
start = 0
stop = len(level)
ranges.append((start,stop))

return ranges

def locs_to_indexer(self, specs):
""" Take a location specification to an indexer

Parameters
----------
self: a sufficiently lexsorted, unique/non-dupe MultIindex.
specs: a list of 2-tuples/list of label positions. Specifically, The
output of _tuple_to_mi_locs.
len(specs) must matc ix.nlevels.

Returns
-------
a generator of row positions relative to ix, corresponding to specs.
Suitable for usage with `iloc`.

Example (This is *not* a doctest):
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
>>> for x in mi.get_values(): print(x)
('A0', 'B0')
('A0', 'B1')
('A1', 'B0')
('A1', 'B1')
('A2', 'B0')
('A2', 'B1')

>>> locs = mi.get_locs((slice('A0','A2'),['B0', 'B1']))
>>> list(mi.locs_to_indexer(locs))
[0, 1, 2, 3]

Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
and 'B0' or 'B1' at level = 0

"""
assert self.is_lexsorted_for_tuple(specs)
assert len(specs) == self.nlevels
assert self.is_unique
assert isinstance(self,MultiIndex)

# step size/increment for iteration at each level
giant_steps = np.cumprod(self.levshape[::-1])[::-1]
giant_steps[:-1] = giant_steps[1:]
giant_steps[-1] = 1

def _iter_vectorize(specs, i=0):
step_size = giant_steps[i]
spec=specs[i]
if isinstance(spec,tuple):
# tuples are 2-tuples of (start,stop) label indices to include
valrange = compat.range(*spec)
elif isinstance(spec,list):
# lists are discrete label indicies to include
valrange = spec

if len(specs)-1 == i:
return np.array(valrange)
else:
tmpl = np.array([v for v in _iter_vectorize(specs,i+1)])
res=np.tile(tmpl,(len(valrange),1))
steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
return (res+steps).flatten()


def _iter_generator(specs, i=0):
step_size = giant_steps[i]
spec=specs[i]
if isinstance(spec,tuple):
# tuples are 2-tuples of (start,stop) label indices to include
valrange = compat.range(*spec)
elif isinstance(spec,list):
# lists are discrete label indicies to include
valrange = spec

if len(specs)-1 == i:
# base case
for v in valrange:
yield v
else:
for base in valrange:
base *= step_size
for v in _iter_generator(specs,i+1):
yield base + v
# validate

return _iter_vectorize(specs)

def truncate(self, before=None, after=None):
"""
Slice index between two labels / tuples, return new MultiIndex
Expand Down
180 changes: 2 additions & 178 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,11 +1181,8 @@ def _getitem_axis(self, key, axis=0):
return self._getitem_iterable(key, axis=axis)
elif isinstance(key, tuple) and isinstance(labels, MultiIndex) and \
any([isinstance(x,slice) for x in key]):
# handle per-axis tuple containting label criteria for
# each level (or a prefix of levels), may contain
# (None) slices, list of labels or labels
specs = _tuple_to_mi_locs(labels,key)
g = _spec_to_array_indices(labels, specs)
locs = labels.get_locs(key)
g = labels.locs_to_indexer(locs)
return self.obj.iloc[g]
else:
self._has_valid_type(key, axis)
Expand Down Expand Up @@ -1571,176 +1568,3 @@ def _maybe_droplevels(index, key):

return index

def _tuple_to_mi_locs(ix,tup):
"""Convert a tuple of slices/label lists/labels to a level-wise spec

Parameters
----------
ix: a sufficiently lexsorted, unique/non-dupe MultIindex.
tup: a tuple of slices, labels or lists of labels.
slice(None) is acceptable, and the case of len(tup)<ix.nlevels
will have labels from trailing levels included.

Returns
-------
a list containing ix.nlevels elements of either:
- 2-tuple representing a (start,stop) slice
or
- a list of label positions.

The positions are relative to the labels of the corresponding level, not to
the entire unrolled index.

Example (This is *not* a doctest):
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
>>> for x in mi.get_values(): print(x)
('A0', 'B0')
('A0', 'B1')
('A1', 'B0')
('A1', 'B1')
('A2', 'B0')
('A2', 'B1')
>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),['B0', 'B1']))
[(0, 2), [0, 1]]

read as:
- All labels in position [0,1) in first level
- for each of those, all labels at positions 0 or 1.

The same effective result can be achieved by specifying the None Slice,
or omitting it completely. Note the tuple (0,2) has replaced the list [0 1],
but the outcome is the same.

>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),slice(None)))
[(0, 2), (0,2)]

>>> _tuple_to_mi_locs(mi,(slice('A0','A2'),))
[(0, 2), (0,2)]

"""


ranges = []

# ix must be lexsorted to at least as many levels
# as there are elements in `tup`
assert ix.is_lexsorted_for_tuple(tup)
assert ix.is_unique
assert isinstance(ix,MultiIndex)

for i,k in enumerate(tup):
level = ix.levels[i]

if _is_list_like(k):
# a collection of labels to include from this level
ranges.append([level.get_loc(x) for x in k])
continue
if k == slice(None):
start = 0
stop = len(level)
elif isinstance(k,slice):
start = level.get_loc(k.start)
stop = len(level)
if k.stop:
stop = level.get_loc(k.stop)
else:
# a single label
start = level.get_loc(k)
stop = start

ranges.append((start,stop))

for i in range(i+1,len(ix.levels)):
# omitting trailing dims
# means include all values
level = ix.levels[i]
start = 0
stop = len(level)
ranges.append((start,stop))

return ranges

def _spec_to_array_indices(ix, specs):
"""Convert a tuple of slices/label lists/labels to a level-wise spec

Parameters
----------
ix: a sufficiently lexsorted, unique/non-dupe MultIindex.
specs: a list of 2-tuples/list of label positions. Specifically, The
output of _tuple_to_mi_locs.
len(specs) must matc ix.nlevels.

Returns
-------
a generator of row positions relative to ix, corresponding to specs.
Suitable for usage with `iloc`.

Example (This is *not* a doctest):
>>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
>>> for x in mi.get_values(): print(x)
('A0', 'B0')
('A0', 'B1')
('A1', 'B0')
('A1', 'B1')
('A2', 'B0')
('A2', 'B1')

>>> specs = _tuple_to_mi_locs(mi,(slice('A0','A2'),['B0', 'B1']))
>>> list(_spec_to_array_indices(mi, specs))
[0, 1, 2, 3]

Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
and 'B0' or 'B1' at level = 0

"""
assert ix.is_lexsorted_for_tuple(specs)
assert len(specs) == ix.nlevels
assert ix.is_unique
assert isinstance(ix,MultiIndex)

# step size/increment for iteration at each level
giant_steps = np.cumprod(ix.levshape[::-1])[::-1]
giant_steps[:-1] = giant_steps[1:]
giant_steps[-1] = 1

def _iter_vectorize(specs, i=0):
step_size = giant_steps[i]
spec=specs[i]
if isinstance(spec,tuple):
# tuples are 2-tuples of (start,stop) label indices to include
valrange = compat.range(*spec)
elif isinstance(spec,list):
# lists are discrete label indicies to include
valrange = spec

if len(specs)-1 == i:
return np.array(valrange)
else:
tmpl = np.array([v for v in _iter_vectorize(specs,i+1)])
res=np.tile(tmpl,(len(valrange),1))
steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
return (res+steps).flatten()


def _iter_generator(specs, i=0):
step_size = giant_steps[i]
spec=specs[i]
if isinstance(spec,tuple):
# tuples are 2-tuples of (start,stop) label indices to include
valrange = compat.range(*spec)
elif isinstance(spec,list):
# lists are discrete label indicies to include
valrange = spec

if len(specs)-1 == i:
# base case
for v in valrange:
yield v
else:
for base in valrange:
base *= step_size
for v in _iter_generator(specs,i+1):
yield base + v
# validate

return _iter_vectorize(specs)