From c6335792f187b2904c2fdd77662d1048dde631cc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 12 Jul 2019 09:09:00 -0700 Subject: [PATCH] CLN/REF: indexing typing, prune unreachable branches (#27351) --- pandas/core/frame.py | 75 ++++++---------- pandas/core/generic.py | 5 +- pandas/core/indexing.py | 188 ++++++++++++++++------------------------ pandas/core/series.py | 29 ++----- 4 files changed, 114 insertions(+), 183 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 263c4013de281..53cb0cedc208b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2889,11 +2889,11 @@ def _set_value(self, index, col, value, takeable=False): _set_value.__doc__ = set_value.__doc__ - def _ixs(self, i, axis=0): + def _ixs(self, i: int, axis: int = 0): """ Parameters ---------- - i : int, slice, or sequence of integers + i : int axis : int Notes @@ -2902,59 +2902,40 @@ def _ixs(self, i, axis=0): """ # irow if axis == 0: - if isinstance(i, slice): - return self[i] - else: - label = self.index[i] - if isinstance(label, Index): - # a location index by definition - result = self.take(i, axis=axis) - copy = True - else: - new_values = self._data.fast_xs(i) - if is_scalar(new_values): - return new_values - - # if we are a copy, mark as such - copy = ( - isinstance(new_values, np.ndarray) and new_values.base is None - ) - result = self._constructor_sliced( - new_values, - index=self.columns, - name=self.index[i], - dtype=new_values.dtype, - ) - result._set_is_copy(self, copy=copy) - return result + label = self.index[i] + new_values = self._data.fast_xs(i) + if is_scalar(new_values): + return new_values + + # if we are a copy, mark as such + copy = isinstance(new_values, np.ndarray) and new_values.base is None + result = self._constructor_sliced( + new_values, + index=self.columns, + name=self.index[i], + dtype=new_values.dtype, + ) + result._set_is_copy(self, copy=copy) + return result # icol else: label = self.columns[i] - if isinstance(i, slice): - # need to return view - lab_slice = slice(label[0], label[-1]) - return self.loc[:, lab_slice] - else: - if isinstance(label, Index): - return self.take(i, axis=1) - index_len = len(self.index) + # if the values returned are not the same length + # as the index (iow a not found value), iget returns + # a 0-len ndarray. This is effectively catching + # a numpy error (as numpy should really raise) + values = self._data.iget(i) - # if the values returned are not the same length - # as the index (iow a not found value), iget returns - # a 0-len ndarray. This is effectively catching - # a numpy error (as numpy should really raise) - values = self._data.iget(i) + if len(self.index) and not len(values): + values = np.array([np.nan] * len(self.index), dtype=object) + result = self._box_col_values(values, label) - if index_len and not len(values): - values = np.array([np.nan] * index_len, dtype=object) - result = self._box_col_values(values, label) + # this is a cached value, mark it so + result._set_as_cached(label, self) - # this is a cached value, mark it so - result._set_as_cached(label, self) - - return result + return result def __getitem__(self, key): key = lib.item_from_zerodim(key) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e19b1f70ce2f7..f28f58b070368 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3495,7 +3495,7 @@ def __delitem__(self, key): deleted = False maybe_shortcut = False - if hasattr(self, "columns") and isinstance(self.columns, MultiIndex): + if self.ndim == 2 and isinstance(self.columns, MultiIndex): try: maybe_shortcut = key not in self.columns._engine except TypeError: @@ -5231,9 +5231,6 @@ def _dir_additions(self): } return super()._dir_additions().union(additions) - # ---------------------------------------------------------------------- - # Getting and setting elements - # ---------------------------------------------------------------------- # Consolidation of internals diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6040385acbe40..482e9c365420c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -143,10 +143,7 @@ def __getitem__(self, key): key = com.apply_if_callable(key, self.obj) return self._getitem_axis(key, axis=axis) - def _get_label(self, label, axis=None): - if axis is None: - axis = self.axis or 0 - + def _get_label(self, label, axis: int): if self.ndim == 1: # for perf reasons we want to try _xs first # as its basically direct indexing @@ -158,12 +155,10 @@ def _get_label(self, label, axis=None): return self.obj._xs(label, axis=axis) - def _get_loc(self, key, axis: int): + def _get_loc(self, key: int, axis: int): return self.obj._ixs(key, axis=axis) - def _slice(self, obj, axis=None, kind=None): - if axis is None: - axis = self.axis + def _slice(self, obj, axis: int, kind=None): return self.obj._slice(obj, axis=axis, kind=kind) def _get_setitem_indexer(self, key): @@ -330,19 +325,6 @@ def _setitem_with_indexer(self, indexer, value): val = list(value.values()) if isinstance(value, dict) else value take_split_path = not blk._can_hold_element(val) - if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): - - for i, ax in zip(indexer, self.obj.axes): - - # if we have any multi-indexes that have non-trivial slices - # (not null slices) then we must take the split path, xref - # GH 10360 - if isinstance(ax, MultiIndex) and not ( - is_integer(i) or com.is_null_slice(i) - ): - take_split_path = True - break - if isinstance(indexer, tuple): nindexer = [] for i, idx in enumerate(indexer): @@ -406,71 +388,16 @@ def _setitem_with_indexer(self, indexer, value): indexer, missing = convert_missing_indexer(indexer) if missing: - - # reindex the axis to the new value - # and set inplace - if self.ndim == 1: - index = self.obj.index - new_index = index.insert(len(index), indexer) - - # we have a coerced indexer, e.g. a float - # that matches in an Int64Index, so - # we will not create a duplicate index, rather - # index to that element - # e.g. 0.0 -> 0 - # GH12246 - if index.is_unique: - new_indexer = index.get_indexer([new_index[-1]]) - if (new_indexer != -1).any(): - return self._setitem_with_indexer(new_indexer, value) - - # this preserves dtype of the value - new_values = Series([value])._values - if len(self.obj._values): - # GH#22717 handle casting compatibility that np.concatenate - # does incorrectly - new_values = _concat_compat([self.obj._values, new_values]) - self.obj._data = self.obj._constructor( - new_values, index=new_index, name=self.obj.name - )._data - self.obj._maybe_update_cacher(clear=True) - return self.obj - - elif self.ndim == 2: - - # no columns and scalar - if not len(self.obj.columns): - raise ValueError( - "cannot set a frame with no defined " "columns" - ) - - # append a Series - if isinstance(value, Series): - - value = value.reindex(index=self.obj.columns, copy=True) - value.name = indexer - - # a list-list - else: - - # must have conforming columns - if is_list_like_indexer(value): - if len(value) != len(self.obj.columns): - raise ValueError( - "cannot set a row with " "mismatched columns" - ) - - value = Series(value, index=self.obj.columns, name=indexer) - - self.obj._data = self.obj.append(value)._data - self.obj._maybe_update_cacher(clear=True) - return self.obj + return self._setitem_with_indexer_missing(indexer, value) # set item_labels = self.obj._get_axis(info_axis) # align and set the values if take_split_path: + # Above we only set take_split_path to True for 2D cases + assert self.ndim == 2 + assert info_axis == 1 if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) @@ -524,11 +451,8 @@ def _setitem_with_indexer(self, indexer, value): # non-mi else: plane_indexer = indexer[:info_axis] + indexer[info_axis + 1 :] - if info_axis > 0: - plane_axis = self.obj.axes[:info_axis][0] - lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis) - else: - lplane_indexer = 0 + plane_axis = self.obj.axes[:info_axis][0] + lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis) def setter(item, v): s = self.obj[item] @@ -578,9 +502,7 @@ def setter(item, v): # hasattr first, to avoid coercing to ndarray without reason. # But we may be relying on the ndarray coercion to check ndim. # Why not just convert to an ndarray earlier on if needed? - elif (hasattr(value, "ndim") and value.ndim == 2) or ( - not hasattr(value, "ndim") and np.array(value).ndim - ) == 2: + elif np.ndim(value) == 2: # note that this coerces the dtype if we are mixed # GH 7551 @@ -656,6 +578,65 @@ def setter(item, v): self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) + def _setitem_with_indexer_missing(self, indexer, value): + """ + Insert new row(s) or column(s) into the Series or DataFrame. + """ + from pandas import Series + + # reindex the axis to the new value + # and set inplace + if self.ndim == 1: + index = self.obj.index + new_index = index.insert(len(index), indexer) + + # we have a coerced indexer, e.g. a float + # that matches in an Int64Index, so + # we will not create a duplicate index, rather + # index to that element + # e.g. 0.0 -> 0 + # GH#12246 + if index.is_unique: + new_indexer = index.get_indexer([new_index[-1]]) + if (new_indexer != -1).any(): + return self._setitem_with_indexer(new_indexer, value) + + # this preserves dtype of the value + new_values = Series([value])._values + if len(self.obj._values): + # GH#22717 handle casting compatibility that np.concatenate + # does incorrectly + new_values = _concat_compat([self.obj._values, new_values]) + self.obj._data = self.obj._constructor( + new_values, index=new_index, name=self.obj.name + )._data + self.obj._maybe_update_cacher(clear=True) + return self.obj + + elif self.ndim == 2: + + if not len(self.obj.columns): + # no columns and scalar + raise ValueError("cannot set a frame with no defined columns") + + if isinstance(value, ABCSeries): + # append a Series + value = value.reindex(index=self.obj.columns, copy=True) + value.name = indexer + + else: + # a list-list + if is_list_like_indexer(value): + # must have conforming columns + if len(value) != len(self.obj.columns): + raise ValueError("cannot set a row with mismatched columns") + + value = Series(value, index=self.obj.columns, name=indexer) + + self.obj._data = self.obj.append(value)._data + self.obj._maybe_update_cacher(clear=True) + return self.obj + def _align_series(self, indexer, ser, multiindex_indexer=False): """ Parameters @@ -820,9 +801,6 @@ def _getitem_tuple(self, tup): # no shortcut needed retval = self.obj for i, key in enumerate(tup): - if i >= self.obj.ndim: - raise IndexingError("Too many indexers") - if com.is_null_slice(key): continue @@ -882,10 +860,10 @@ def _convert_for_reindex(self, key, axis: int): def _handle_lowerdim_multi_index_axis0(self, tup): # we have an axis0 multi-index, handle or raise - + axis = self.axis or 0 try: # fast path for series or for tup devoid of slices - return self._get_label(tup, axis=self.axis) + return self._get_label(tup, axis=axis) except TypeError: # slices are unhashable pass @@ -983,7 +961,8 @@ def _getitem_nested_tuple(self, tup): # this is a series with a multi-index specified a tuple of # selectors - return self._getitem_axis(tup, axis=self.axis) + axis = self.axis or 0 + return self._getitem_axis(tup, axis=axis) # handle the multi-axis by taking sections and reducing # this is iterative @@ -1010,11 +989,7 @@ def _getitem_nested_tuple(self, tup): return obj - def _getitem_axis(self, key, axis=None): - - if axis is None: - axis = self.axis or 0 - + def _getitem_axis(self, key, axis: int): if is_iterator(key): key = list(key) self._validate_key(key, axis) @@ -1439,7 +1414,7 @@ def _is_scalar_access(self, key): def _getitem_scalar(self, key): raise NotImplementedError() - def _getitem_axis(self, key, axis=None): + def _getitem_axis(self, key, axis: int): raise NotImplementedError() def _getbool_axis(self, key, axis: int): @@ -1786,10 +1761,7 @@ def _get_partial_string_timestamp_match_key(self, key, labels): return key - def _getitem_axis(self, key, axis=None): - if axis is None: - axis = self.axis or 0 - + def _getitem_axis(self, key, axis: int): key = item_from_zerodim(key) if is_iterator(key): key = list(key) @@ -2106,9 +2078,6 @@ def _getitem_tuple(self, tup): retval = self.obj axis = 0 for i, key in enumerate(tup): - if i >= self.obj.ndim: - raise IndexingError("Too many indexers") - if com.is_null_slice(key): axis += 1 continue @@ -2143,10 +2112,7 @@ def _get_list_axis(self, key, axis: int): # re-raise with different error message raise IndexError("positional indexers are out-of-bounds") - def _getitem_axis(self, key, axis=None): - if axis is None: - axis = self.axis or 0 - + def _getitem_axis(self, key, axis: int): if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 73a71a2a41f4c..6a58b1ea6f82d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1028,38 +1028,25 @@ def axes(self): """ return [self.index] - def _ixs(self, i, axis=0): + def _ixs(self, i: int, axis: int = 0): """ Return the i-th value or values in the Series by location. Parameters ---------- - i : int, slice, or sequence of integers + i : int Returns ------- scalar (int) or Series (slice, sequence) """ - try: - # dispatch to the values if we need - values = self._values - if isinstance(values, np.ndarray): - return libindex.get_value_at(values, i) - else: - return values[i] - except IndexError: - raise - except Exception: - if isinstance(i, slice): - indexer = self.index._convert_slice_indexer(i, kind="iloc") - return self._get_values(indexer) - else: - label = self.index[i] - if isinstance(label, Index): - return self.take(i, axis=axis, convert=True) - else: - return libindex.get_value_at(self, i) + # dispatch to the values if we need + values = self._values + if isinstance(values, np.ndarray): + return libindex.get_value_at(values, i) + else: + return values[i] @property def _is_mixed_type(self):