CLN: move indexing loc changes to index.py

pandas-dev · jreback · Feb 13, 2014 · Jan 27, 2014 · Feb 4, 2014 · Feb 5, 2014
commit 30eb6dbbbe75cbbad36bd0d18bbb9563139bc009
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -3249,6 +3249,179 @@ def _get_level_indexer(self, key, level=0):
             j = labels.searchsorted(loc, side='right')
             return slice(i, j)
 
+    def get_locs(self, tup):
+        """Convert a tuple of slices/label lists/labels to a level-wise spec
+
+        Parameters
+        ----------
+        self: a sufficiently lexsorted, unique/non-dupe MultIindex.
+        tup: a tuple of slices, labels or lists of labels.
+        slice(None) is acceptable, and the case of len(tup)<ix.nlevels
+        will have labels from trailing levels included.
+
+        Returns
+        -------
+        a list containing ix.nlevels elements of either:
+        - 2-tuple representing a (start,stop) slice
+        or
+        - a list of label positions.
+
+        The positions are relative to the labels of the corresponding level, not to
+        the entire unrolled index.
+
+        Example (This is *not* a doctest):
+        >>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
+        >>> for x in  mi.get_values(): print(x)
+        ('A0', 'B0')
+        ('A0', 'B1')
+        ('A1', 'B0')
+        ('A1', 'B1')
+        ('A2', 'B0')
+        ('A2', 'B1')
+        >>> mi.get_specs((slice('A0','A2'),['B0', 'B1']))
+        [(0, 2), [0, 1]]
+
+        read as:
+        - All labels in position [0,1) in first level
+        - for each of those, all labels at positions 0 or 1.
+
+        The same effective result can be achieved by specifying the None Slice,
+        or omitting it completely. Note the tuple (0,2) has replaced the list [0 1],
+        but the outcome is the same.
+
+        >>> mi.get_locs((slice('A0','A2'),slice(None)))
+        [(0, 2), (0,2)]
+
+        >>> mi.get_locs((slice('A0','A2'),))
+        [(0, 2), (0,2)]
+
+        """
+
+        ranges = []
+
+        # self must be lexsorted to at least as many levels
+        # as there are elements in `tup`
+        assert self.is_lexsorted_for_tuple(tup)
+        assert self.is_unique
+        assert isinstance(self,MultiIndex)
+
+        for i,k in enumerate(tup):
+            level = self.levels[i]
+
+            if com.is_list_like(k):
+                # a collection of labels to include from this level
+                ranges.append([level.get_loc(x) for x in k])
+                continue
+            if k == slice(None):
+                start = 0
+                stop = len(level)
+            elif isinstance(k,slice):
+                start = level.get_loc(k.start)
+                stop = len(level)
+                if k.stop:
+                    stop = level.get_loc(k.stop)
+            else:
+                # a single label
+                start = level.get_loc(k)
+                stop = start
+
+            ranges.append((start,stop))
+
+        for i in range(i+1,len(self.levels)):
+            # omitting trailing dims
+            # means include all values
+            level = self.levels[i]
+            start = 0
+            stop = len(level)
+            ranges.append((start,stop))
+
+        return ranges
+
+    def locs_to_indexer(self, specs):
+        """ Take a location specification to an indexer
+
+        Parameters
+        ----------
+        self: a sufficiently lexsorted, unique/non-dupe MultIindex.
+        specs: a list of 2-tuples/list of label positions. Specifically, The
+        output of _tuple_to_mi_locs.
+        len(specs) must matc ix.nlevels.
+
+        Returns
+        -------
+        a generator of row positions relative to ix, corresponding to specs.
+        Suitable for usage with `iloc`.
+
+        Example (This is *not* a doctest):
+        >>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
+        >>> for x in  mi.get_values(): print(x)
+        ('A0', 'B0')
+        ('A0', 'B1')
+        ('A1', 'B0')
+        ('A1', 'B1')
+        ('A2', 'B0')
+        ('A2', 'B1')
+
+        >>> locs = mi.get_locs((slice('A0','A2'),['B0', 'B1']))
+        >>> list(mi.locs_to_indexer(locs))
+        [0, 1, 2, 3]
+
+        Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
+        and 'B0' or 'B1' at level = 0
+
+        """
+        assert self.is_lexsorted_for_tuple(specs)
+        assert len(specs) == self.nlevels
+        assert self.is_unique
+        assert isinstance(self,MultiIndex)
+
+        # step size/increment for iteration at each level
+        giant_steps = np.cumprod(self.levshape[::-1])[::-1]
+        giant_steps[:-1] = giant_steps[1:]
+        giant_steps[-1] = 1
+
+        def _iter_vectorize(specs, i=0):
+            step_size = giant_steps[i]
+            spec=specs[i]
+            if isinstance(spec,tuple):
+                # tuples are 2-tuples of (start,stop) label indices to include
+                valrange = compat.range(*spec)
+            elif isinstance(spec,list):
+                # lists are discrete label indicies to include
+                valrange = spec
+
+            if len(specs)-1 == i:
+                return np.array(valrange)
+            else:
+                tmpl = np.array([v for v in _iter_vectorize(specs,i+1)])
+                res=np.tile(tmpl,(len(valrange),1))
+                steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
+                return (res+steps).flatten()
+
+
+        def _iter_generator(specs, i=0):
+            step_size = giant_steps[i]
+            spec=specs[i]
+            if isinstance(spec,tuple):
+                # tuples are 2-tuples of (start,stop) label indices to include
+                valrange = compat.range(*spec)
+            elif isinstance(spec,list):
+                # lists are discrete label indicies to include
+                valrange = spec
+
+            if len(specs)-1 == i:
+                # base case
+                for v in valrange:
+                    yield v
+                else:
+                    for base in valrange:
+                        base *= step_size
+                        for v in _iter_generator(specs,i+1):
+                            yield base + v
+            # validate
+
+        return _iter_vectorize(specs)
+
     def truncate(self, before=None, after=None):
         """
         Slice index between two labels / tuples, return new MultiIndex

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1181,11 +1181,8 @@ def _getitem_axis(self, key, axis=0):
             return self._getitem_iterable(key, axis=axis)
         elif isinstance(key, tuple) and isinstance(labels, MultiIndex) and \
             any([isinstance(x,slice) for x in key]):
-            # handle per-axis tuple containting label criteria for
-            # each level (or a prefix of levels), may contain
-            # (None) slices, list of labels or labels
-            specs = _tuple_to_mi_locs(labels,key)
-            g = _spec_to_array_indices(labels, specs)
+            locs = labels.get_locs(key)
+            g = labels.locs_to_indexer(locs)
             return self.obj.iloc[g]
         else:
             self._has_valid_type(key, axis)
@@ -1571,176 +1568,3 @@ def _maybe_droplevels(index, key):
 
     return index
 
-def _tuple_to_mi_locs(ix,tup):
-    """Convert a tuple of slices/label lists/labels to a level-wise spec
-
-    Parameters
-    ----------
-    ix: a sufficiently lexsorted, unique/non-dupe MultIindex.
-    tup: a tuple of slices, labels or lists of labels.
-         slice(None) is acceptable, and the case of len(tup)<ix.nlevels
-         will have labels from trailing levels included.
-
-    Returns
-    -------
-    a list containing ix.nlevels elements of either:
-    - 2-tuple representing a (start,stop) slice
-    or
-    - a list of label positions.
-
-    The positions are relative to the labels of the corresponding level, not to
-    the entire unrolled index.
-
-    Example (This is *not* a doctest):
-    >>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
-    >>> for x in  mi.get_values(): print(x)
-     ('A0', 'B0')
-     ('A0', 'B1')
-     ('A1', 'B0')
-     ('A1', 'B1')
-     ('A2', 'B0')
-     ('A2', 'B1')
-    >>> _tuple_to_mi_locs(mi,(slice('A0','A2'),['B0', 'B1']))
-    [(0, 2), [0, 1]]
-
-    read as:
-    - All labels in position [0,1) in first level
-    - for each of those, all labels at positions 0 or 1.
-
-    The same effective result can be achieved by specifying the None Slice,
-    or omitting it completely. Note the tuple (0,2) has replaced the list [0 1],
-    but the outcome is the same.
-
-    >>> _tuple_to_mi_locs(mi,(slice('A0','A2'),slice(None)))
-    [(0, 2), (0,2)]
-
-    >>> _tuple_to_mi_locs(mi,(slice('A0','A2'),))
-    [(0, 2), (0,2)]
-
-    """
-
-
-    ranges = []
-
-    # ix must be lexsorted to at least as many levels
-    # as there are elements in `tup`
-    assert ix.is_lexsorted_for_tuple(tup)
-    assert ix.is_unique
-    assert isinstance(ix,MultiIndex)
-
-    for i,k in enumerate(tup):
-        level = ix.levels[i]
-
-        if _is_list_like(k):
-                # a collection of labels to include from this level
-            ranges.append([level.get_loc(x) for x in k])
-            continue
-        if k == slice(None):
-            start = 0
-            stop = len(level)
-        elif isinstance(k,slice):
-            start = level.get_loc(k.start)
-            stop = len(level)
-            if k.stop:
-                stop = level.get_loc(k.stop)
-        else:
-            # a single label
-            start = level.get_loc(k)
-            stop = start
-
-        ranges.append((start,stop))
-
-    for i in range(i+1,len(ix.levels)):
-        # omitting trailing dims
-        # means include all values
-        level = ix.levels[i]
-        start = 0
-        stop = len(level)
-        ranges.append((start,stop))
-
-    return ranges
-
-def _spec_to_array_indices(ix, specs):
-    """Convert a tuple of slices/label lists/labels to a level-wise spec
-
-    Parameters
-    ----------
-    ix: a sufficiently lexsorted, unique/non-dupe MultIindex.
-    specs: a list of 2-tuples/list of label positions. Specifically, The
-           output of _tuple_to_mi_locs.
-           len(specs) must matc ix.nlevels.
-
-    Returns
-    -------
-    a generator of row positions relative to ix, corresponding to specs.
-    Suitable for usage with `iloc`.
-
-    Example (This is *not* a doctest):
-    >>> mi = pd.MultiIndex.from_product([['A0', 'A1', 'A2'],['B0', 'B1']])
-    >>> for x in  mi.get_values(): print(x)
-     ('A0', 'B0')
-     ('A0', 'B1')
-     ('A1', 'B0')
-     ('A1', 'B1')
-     ('A2', 'B0')
-     ('A2', 'B1')
-
-    >>> specs = _tuple_to_mi_locs(mi,(slice('A0','A2'),['B0', 'B1']))
-    >>> list(_spec_to_array_indices(mi, specs))
-    [0, 1, 2, 3]
-
-    Which are all the labels having 'A0' to 'A2' (non-inclusive) at level=0
-    and 'B0' or 'B1' at level = 0
-
-    """
-    assert ix.is_lexsorted_for_tuple(specs)
-    assert len(specs) == ix.nlevels
-    assert ix.is_unique
-    assert isinstance(ix,MultiIndex)
-
-    # step size/increment for iteration at each level
-    giant_steps = np.cumprod(ix.levshape[::-1])[::-1]
-    giant_steps[:-1] = giant_steps[1:]
-    giant_steps[-1] = 1
-
-    def _iter_vectorize(specs, i=0):
-        step_size = giant_steps[i]
-        spec=specs[i]
-        if isinstance(spec,tuple):
-            # tuples are 2-tuples of (start,stop) label indices to include
-            valrange = compat.range(*spec)
-        elif isinstance(spec,list):
-           # lists are discrete label indicies to include
-            valrange = spec
-
-        if len(specs)-1 == i:
-            return np.array(valrange)
-        else:
-            tmpl = np.array([v for v in _iter_vectorize(specs,i+1)])
-            res=np.tile(tmpl,(len(valrange),1))
-            steps=(np.array(valrange)*step_size).reshape((len(valrange),1))
-            return (res+steps).flatten()
-
-
-    def _iter_generator(specs, i=0):
-        step_size = giant_steps[i]
-        spec=specs[i]
-        if isinstance(spec,tuple):
-            # tuples are 2-tuples of (start,stop) label indices to include
-            valrange = compat.range(*spec)
-        elif isinstance(spec,list):
-           # lists are discrete label indicies to include
-            valrange = spec
-
-        if len(specs)-1 == i:
-            # base case
-            for v in valrange:
-                yield v
-        else:
-            for base in valrange:
-                base *= step_size
-                for v in _iter_generator(specs,i+1):
-                    yield base + v
-    # validate
-
-    return _iter_vectorize(specs)