issues with overlapping multi index intervals

Scenario 1: single-level indexing, which works fine:

    import pandas as pd # pandas version 0.25.0, python version: 3.6.6
    idx = pd.IntervalIndex.from_arrays([1,3,1,2],
                                 [3,4,2,4])
    df = pd.DataFrame({'Value':[1,2,3,4]},index=idx) 

which returns:

    df = 
              Value
    (1,3]   1
    (3,4]   2
    (1,2]   3
    (2,4]   4

query results:

    df.loc[1.5] = 
              Value
    (1,3]   1
    (1,2]   3

Scenario 2: Multi-level indexing:

    idx1 = pd.MultiIndex.from_arrays([
        pd.Index(['label1','label1','label2','label2']),
        pd.IntervalIndex.from_arrays([1,3,1,2],
                                 [3,4,2,4])
    ])
    idx2 = pd.MultiIndex.from_arrays([
        pd.Index(['label1','label1','label2','label2']),
        pd.IntervalIndex.from_arrays([1,2,1,2],
                                 [2,4,2,4])
    ])
    df1 = pd.DataFrame({'Value':[1,2,3,4]},index=idx1) #with overlapping intervals 
    df2 = pd.DataFrame({'Value':[1,2,3,4]},index=idx2) #without overlapping intervals

which returns:

    df1 = 
                        Value
    label1    (1,3]   1
    label1    (3,4]   2
    label2    (1,2]   3
    label2    (2,4]   4
    df2 = 
                        Value
    label1    (1,2]   1
    label1    (2,4]   2
    label2    (1,2]   3
    label2    (2,4]   4

query method 1: works fine on both df1 and df2 but is slow

    df1.Value.loc['label1'].loc[1.5]
    1

query method 2: works only with df2, doesn't work with df1, is 10 times faster than query method 1

    df2.Value.loc[('label1',1.5)]
    1
    df1.Value.loc[('label1',1.5)]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2889             try:
-> 2890                 return self._engine.get_loc(key)
   2891             except KeyError:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 1.5

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-12-ef83c1160165> in <module>()
     11 display(df)
     12 print(df.loc['label1'].loc[1.5])
---> 13 print(df.loc[('label1',1.5)])

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
   1402                 except (KeyError, IndexError, AttributeError):
   1403                     pass
-> 1404             return self._getitem_tuple(key)
   1405         else:
   1406             # we by definition only have the 0th axis

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
    789     def _getitem_tuple(self, tup):
    790         try:
--> 791             return self._getitem_lowerdim(tup)
    792         except IndexingError:
    793             pass

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
    945                     return section
    946                 # This is an elided recursive call to iloc/loc/etc'
--> 947                 return getattr(section, self.name)[new_key]
    948 
    949         raise IndexingError("not applicable")

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
   1402                 except (KeyError, IndexError, AttributeError):
   1403                     pass
-> 1404             return self._getitem_tuple(key)
   1405         else:
   1406             # we by definition only have the 0th axis

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
    789     def _getitem_tuple(self, tup):
    790         try:
--> 791             return self._getitem_lowerdim(tup)
    792         except IndexingError:
    793             pass

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
    913         for i, key in enumerate(tup):
    914             if is_label_like(key) or isinstance(key, tuple):
--> 915                 section = self._getitem_axis(key, axis=i)
    916 
    917                 # we have yielded a scalar ?

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
   1823         # fall thru to straight lookup
   1824         self._validate_key(key, axis)
-> 1825         return self._get_label(key, axis=axis)
   1826 
   1827 

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
    155             raise IndexingError("no slices here, handle elsewhere")
    156 
--> 157         return self.obj._xs(label, axis=axis)
    158 
    159     def _get_loc(self, key: int, axis: int):

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
   3728 
   3729         if axis == 1:
-> 3730             return self[key]
   3731 
   3732         self._consolidate_inplace()

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2973             if self.columns.nlevels > 1:
   2974                 return self._getitem_multilevel(key)
-> 2975             indexer = self.columns.get_loc(key)
   2976             if is_integer(indexer):
   2977                 indexer = [indexer]

C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2890                 return self._engine.get_loc(key)
   2891             except KeyError:
-> 2892                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2893         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2894         if indexer.ndim > 1 or indexer.size > 1:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 1.5


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

issues with overlapping multi index intervals #27456

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

issues with overlapping multi index intervals #27456

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions