API: iat/iloc will raise if enlargement is specified as its ambiguous

CLN: set_value in Series/Frame now go thru indexing routings in core/indexing.py
pandas-dev · jreback · Sep 5, 2013 · Aug 8, 2013 · Aug 24, 2013 · Sep 5, 2013
commit 00b6c8990bbc906171f17148649a88717f12c930
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -412,40 +412,14 @@ Pandas will detect this and raise ``IndexError``, rather than return an empty st
     >>> df.iloc[:,3:6]
     IndexError: out-of-bounds on slice (end)
 
-.. _indexing.basics.get_value:
-
-Fast scalar value getting and setting
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Since indexing with ``[]`` must handle a lot of cases (single-label access,
-slicing, boolean indexing, etc.), it has a bit of overhead in order to figure
-out what you're asking for. If you only want to access a scalar value, the
-fastest way is to use the ``at`` and ``iat`` methods, which are implemented on
-all of the data structures.
-
-Similary to ``loc``, ``at`` provides **label** based scalar lookups, while, ``iat`` provides **integer** based lookups analagously to ``iloc``
-
-.. ipython:: python
-
-   s.iat[5]
-   df.at[dates[5], 'A']
-   df.iat[3, 0]
-
-You can also set using these same indexers. These have the additional
-capability of enlarging an object. This method *always* returns a reference to
-the object it modified, which in the case of enlargement, will be a **new object**:
-
-.. ipython:: python
-
-   df.at[dates[5], 'E'] = 7
-   df.iat[3, 0] = 7
-
 .. _indexing.basics.partial_setting:
 
 Setting With Enlargement
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-The ``.loc/.iloc/[]`` operations can perform enlargement when setting a non-existant key for that axis.
+.. versionadded:: 0.13
+
+The ``.loc/.ix/[]`` operations can perform enlargement when setting a non-existant key for that axis.
 
 In the ``Series`` case this is effectively an appending operation
 
@@ -473,6 +447,38 @@ This is like an ``append`` operation on the ``DataFrame``.
    dfi.loc[3] = 5
    dfi
 
+.. _indexing.basics.get_value:
+
+Fast scalar value getting and setting
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since indexing with ``[]`` must handle a lot of cases (single-label access,
+slicing, boolean indexing, etc.), it has a bit of overhead in order to figure
+out what you're asking for. If you only want to access a scalar value, the
+fastest way is to use the ``at`` and ``iat`` methods, which are implemented on
+all of the data structures.
+
+Similary to ``loc``, ``at`` provides **label** based scalar lookups, while, ``iat`` provides **integer** based lookups analagously to ``iloc``
+
+.. ipython:: python
+
+   s.iat[5]
+   df.at[dates[5], 'A']
+   df.iat[3, 0]
+
+You can also set using these same indexers.
+
+.. ipython:: python
+
+   df.at[dates[5], 'E'] = 7
+   df.iat[3, 0] = 7
+
+``at`` may enlarge the object in-place as above if the indexer is missing.
+
+.. ipython:: python
+
+   df.at[6, 0] = 7
+   df
 
 Boolean indexing
 ~~~~~~~~~~~~~~~~

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -92,6 +92,9 @@ pandas 0.13
     an alias of iteritems used to get around ``2to3``'s changes).
     (:issue:`4384`, :issue:`4375`, :issue:`4372`)
   - ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`)
+  - allow ``ix/loc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in
+    the index for that axis (:issue:`2578`)
+  - ``at`` now will enlarge the object inplace (and return the same) (:issue:`2578`)
 
   - ``HDFStore``
 
@@ -123,8 +126,6 @@ pandas 0.13
 
     - added ``date_unit`` parameter to specify resolution of timestamps. Options
       are seconds, milliseconds, microseconds and nanoseconds. (:issue:`4362`, :issue:`4498`).
-    - allow ``ix/loc/iloc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in
-      the index for that axis (:issue:`2578`)
 
   - ``Index`` and ``MultiIndex`` changes (:issue:`4039`):
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1731,18 +1731,12 @@ def set_value(self, index, col, value):
             engine.set_value(series.values, index, value)
             return self
         except KeyError:
-            new_index, new_columns = self._expand_axes((index, col))
-            result = self.reindex(index=new_index, columns=new_columns,
-                                  copy=False)
-            likely_dtype, value = _infer_dtype_from_scalar(value)
 
-            made_bigger = not np.array_equal(new_columns, self.columns)
+            # set using a non-recursive method & reset the cache
+            self.loc[index,col] = value
+            self._item_cache.pop(col,None)
 
-            # how to make this logic simpler?
-            if made_bigger:
-                com._possibly_cast_item(result, col, likely_dtype)
-
-            return result.set_value(index, col, value)
+            return self
 
     def irow(self, i, copy=False):
         return self._ixs(i, axis=0)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -102,12 +102,41 @@ def _convert_tuple(self, key, is_setter=False):
             keyidx.append(idx)
         return tuple(keyidx)
 
+    def _has_valid_setitem_indexer(self, indexer):
+        return True
+
+    def _has_valid_positional_setitem_indexer(self, indexer):
+        """ validate that an positional indexer cannot enlarge its target
+            will raise if needed, does not modify the indexer externally """
+        if isinstance(indexer, dict):
+            raise IndexError("{0} cannot enlarge its target object".format(self.name))
+        else:
+            if not isinstance(indexer, tuple):
+                indexer = self._tuplify(indexer)
+            for ax, i in zip(self.obj.axes,indexer):
+                if isinstance(i, slice):
+                    # should check the stop slice?
+                    pass
+                elif is_list_like(i):
+                    # should check the elements?
+                    pass
+                elif com.is_integer(i):
+                    if i >= len(ax):
+                        raise IndexError("{0} cannot enlarge its target object".format(self.name))
+                elif isinstance(i, dict):
+                    raise IndexError("{0} cannot enlarge its target object".format(self.name))
+
+        return True
+
     def _setitem_with_indexer(self, indexer, value):
 
+        self._has_valid_setitem_indexer(indexer)
+
         # also has the side effect of consolidating in-place
         from pandas import Panel, DataFrame, Series
 
         # maybe partial set
+        take_split_path = self.obj._is_mixed_type
         if isinstance(indexer,tuple):
             nindexer = []
             for i, idx in enumerate(indexer):
@@ -116,10 +145,26 @@ def _setitem_with_indexer(self, indexer, value):
                     # reindex the axis to the new value
                     # and set inplace
                     key,_ = _convert_missing_indexer(idx)
-                    labels = self.obj._get_axis(i) + Index([key])
+
+                    # if this is the items axes, then take the main missing path
+                    # first; this correctly sets the dtype and avoids cache issues
+                    # essentially this separates out the block that is needed to possibly
+                    # be modified
+                    if self.ndim > 1 and i == self.obj._info_axis_number:
+
+                        # add the new item, and set the value
+                        new_indexer = _convert_from_missing_indexer_tuple(indexer)
+                        self.obj[key] = np.nan
+                        self.obj.loc[new_indexer] = value
+                        return self.obj
+
+                    # reindex the axis
+                    index = self.obj._get_axis(i)
+                    labels = _safe_append_to_index(index, key)
                     self.obj._data = self.obj.reindex_axis(labels,i)._data
 
                     nindexer.append(labels.get_loc(key))
+
                 else:
                     nindexer.append(idx)
 
@@ -133,11 +178,19 @@ def _setitem_with_indexer(self, indexer, value):
                 # reindex the axis to the new value
                 # and set inplace
                 if self.ndim == 1:
-                    self.obj._data = self.obj.append(Series(value,index=[indexer]))._data
-                    return
+                    index = self.obj.index
+                    if len(index) == 0:
+                        new_index = Index([indexer])
+                    else:
+                        new_index = _safe_append_to_index(index, indexer)
+
+                    new_values = np.concatenate([self.obj.values, [value]])
+                    self.obj._data = self.obj._constructor(new_values, index=new_index, name=self.obj.name)
+                    return self.obj
 
                 elif self.ndim == 2:
-                    labels = self.obj._get_axis(0) + Index([indexer])
+                    index = self.obj._get_axis(0)
+                    labels = _safe_append_to_index(index, indexer)
                     self.obj._data = self.obj.reindex_axis(labels,0)._data
                     return getattr(self.obj,self.name).__setitem__(indexer,value)
 
@@ -146,7 +199,7 @@ def _setitem_with_indexer(self, indexer, value):
                     return self.obj.__setitem__(indexer,value)
 
         # align and set the values
-        if self.obj._is_mixed_type:
+        if take_split_path:
             if not isinstance(indexer, tuple):
                 indexer = self._tuplify(indexer)
 
@@ -732,6 +785,10 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
 
                 mask = check == -1
                 if mask.any():
+
+                    # mi here
+                    if isinstance(obj, tuple) and is_setter:
+                        return { 'key' : obj }
                     raise KeyError('%s not in index' % objarr[mask])
 
                 return indexer
@@ -742,7 +799,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
             except (KeyError):
 
                 # allow a not found key only if we are a setter
-                if np.isscalar(obj) and is_setter:
+                if not is_list_like(obj) and is_setter:
                     return { 'key' : obj }
                 raise
 
@@ -933,6 +990,9 @@ def _has_valid_type(self, key, axis):
 
         return isinstance(key, slice) or com.is_integer(key) or _is_list_like(key)
 
+    def _has_valid_setitem_indexer(self, indexer):
+        self._has_valid_positional_setitem_indexer(indexer)
+
     def _getitem_tuple(self, tup):
 
         self._has_valid_tuple(tup)
@@ -965,7 +1025,6 @@ def _get_slice_axis(self, slice_obj, axis=0):
             return self.obj.take(slice_obj, axis=axis)
 
     def _getitem_axis(self, key, axis=0):
-
         if isinstance(key, slice):
             self._has_valid_type(key,axis)
             return self._get_slice_axis(key, axis=axis)
@@ -1005,14 +1064,12 @@ def __getitem__(self, key):
             else:
                 raise ValueError('Invalid call for scalar access (getting)!')
 
-        if len(key) != self.obj.ndim:
-            raise ValueError('Not enough indexers for scalar access (getting)!')
         key = self._convert_key(key)
         return self.obj.get_value(*key)
 
     def __setitem__(self, key, value):
         if not isinstance(key, tuple):
-            raise ValueError('Invalid call for scalar access (setting)!')
+            key = self._tuplify(key)
         if len(key) != self.obj.ndim:
             raise ValueError('Not enough indexers for scalar access (setting)!')
         key = self._convert_key(key)
@@ -1026,6 +1083,9 @@ class _AtIndexer(_ScalarAccessIndexer):
 class _iAtIndexer(_ScalarAccessIndexer):
     """ integer based scalar accessor """
 
+    def _has_valid_setitem_indexer(self, indexer):
+        self._has_valid_positional_setitem_indexer(indexer)
+
     def _convert_key(self, key):
         """ require  integer args (and convert to label arguments) """
         ckey = []
@@ -1179,6 +1239,19 @@ def _convert_missing_indexer(indexer):
 
     return indexer, False
 
+def _convert_from_missing_indexer_tuple(indexer):
+    """ create a filtered indexer that doesn't have any missing indexers """
+    def get_indexer(_idx):
+        return _idx['key'] if isinstance(_idx,dict) else _idx
+    return tuple([ get_indexer(_idx) for _i, _idx in enumerate(indexer) ])
+
+def _safe_append_to_index(index, key):
+    """ a safe append to an index, if incorrect type, then catch and recreate """
+    try:
+        return index.insert(len(index), key)
+    except:
+        return Index(np.concatenate([index.asobject.values,np.array([key])]))
+
 def _maybe_convert_indices(indices, n):
     """ if we have negative indicies, translate to postive here
         if have indicies that are out-of-bounds, raise an IndexError """

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -9,7 +9,7 @@
 
 from pandas.core.common import (_possibly_downcast_to_dtype, isnull, notnull,
                                 _NS_DTYPE, _TD_DTYPE, ABCSeries, ABCSparseSeries,
-                                is_list_like)
+                                is_list_like, _infer_dtype_from_scalar)
 from pandas.core.index import (Index, MultiIndex, _ensure_index,
                                _handle_legacy_indexes)
 from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
@@ -460,6 +460,24 @@ def _try_cast_result(self, result, dtype=None):
         if self.is_integer or self.is_bool or self.is_datetime:
             pass
         elif self.is_float and result.dtype == self.dtype:
+
+            # protect against a bool/object showing up here
+            if isinstance(dtype,compat.string_types) and dtype == 'infer':
+                return result
+            if not isinstance(dtype,type):
+                dtype = dtype.type
+            if issubclass(dtype,(np.bool_,np.object_)):
+                if issubclass(dtype,np.bool_):
+                    if isnull(result).all():
+                        return result.astype(np.bool_)
+                    else:
+                        result = result.astype(np.object_)
+                        result[result==1] = True
+                        result[result==0] = False
+                        return result
+                else:
+                    return result.astype(np.object_)
+
             return result
 
         # may need to change the dtype here
@@ -536,8 +554,12 @@ def setitem(self, indexer, value):
             values[indexer] = value
 
             # coerce and try to infer the dtypes of the result
+            if np.isscalar(value):
+                dtype,_ = _infer_dtype_from_scalar(value)
+            else:
+                dtype = 'infer'
             values = self._try_coerce_result(values)
-            values = self._try_cast_result(values, 'infer')
+            values = self._try_cast_result(values, dtype)
             return [make_block(transf(values), self.items, self.ref_items, ndim=self.ndim, fastpath=True)]
         except:
             pass
@@ -902,7 +924,7 @@ def _can_hold_element(self, element):
         if is_list_like(element):
             element = np.array(element)
             return issubclass(element.dtype.type, (np.floating, np.integer))
-        return isinstance(element, (float, int))
+        return isinstance(element, (float, int, np.float_, np.int_)) and not isinstance(bool,np.bool_)
 
     def _try_cast(self, element):
         try:

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1204,13 +1204,10 @@ def set_value(self, label, value):
             self.index._engine.set_value(self.values, label, value)
             return self
         except KeyError:
-            if len(self.index) == 0:
-                new_index = Index([label])
-            else:
-                new_index = self.index.insert(len(self), label)
 
-            new_values = np.concatenate([self.values, [value]])
-            return self._constructor(new_values, index=new_index, name=self.name)
+            # set using a non-recursive method
+            self.loc[label] = value
+            return self
 
     def reset_index(self, level=None, drop=False, name=None, inplace=False):
         """

diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
@@ -1075,6 +1075,7 @@ def test_icol(self):
                           type(iframe.icol(0).sp_index))
 
     def test_set_value(self):
+
         res = self.frame.set_value('foobar', 'B', 1.5)
         self.assert_(res is not self.frame)
         self.assert_(res.index[-1] == 'foobar')