FIX-#2253: loc assignment fixed in case of (1, 1) shape frame (#2316)

dchigarev · web-flow · commit 5c5a8e024f5b · 2020-10-27T22:34:44.000+03:00
Signed-off-by: Dmitry Chigarev &lt;dmitry.chigarev@intel.com&gt;
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
@@ -60,6 +60,26 @@ def is_slice(x):
     return isinstance(x, slice)
 
 
+def compute_sliced_len(slc, sequence_len):
+    """
+    Compute length of sliced object.
+
+    Parameters
+    ----------
+    slc: slice
+        Slice object
+    sequence_len: int
+        Length of sequence, to which slice will be applied
+
+    Returns
+    -------
+    int
+        Length of object after applying slice object on it.
+    """
+    # This will translate slice to a range, from which we can retrieve length
+    return len(range(*slc.indices(sequence_len)))
+
+
 def is_2d(x):
     """
     Implement [METHOD_NAME].
@@ -293,7 +313,7 @@ def __getitem__(self, row_lookup, col_lookup, ndim):
             )
         return self.df.__constructor__(query_compiler=qc_view).squeeze(axis=axis)
 
-    def __setitem__(self, row_lookup, col_lookup, item):
+    def __setitem__(self, row_lookup, col_lookup, item, axis=None):
         """
         Implement [METHOD_NAME].
 
@@ -317,15 +337,11 @@ def __setitem__(self, row_lookup, col_lookup, item):
             col_lookup = range(len(self.qc.columns))[col_lookup]
         # This is True when we dealing with assignment of a full column. This case
         # should be handled in a fastpath with `df[col] = item`.
-        if (
-            len(row_lookup) == len(self.qc.index)
-            and len(col_lookup) == 1
-            and hasattr(self.df, "columns")
-        ):
+        if axis == 0:
             self.df[self.df.columns[col_lookup][0]] = item
         # This is True when we are assigning to a full row. We want to reuse the setitem
         # mechanism to operate along only one axis for performance reasons.
-        elif len(col_lookup) == len(self.qc.columns) and len(row_lookup) == 1:
+        elif axis == 1:
             if hasattr(item, "_query_compiler"):
                 item = item._query_compiler
             new_qc = self.qc.setitem(1, self.qc.index[row_lookup[0]], item)
@@ -417,6 +433,57 @@ def _write_items(self, row_lookup, col_lookup, item):
         new_qc = self.qc.write_items(row_lookup, col_lookup, item)
         self.df._create_or_update_from_compiler(new_qc, inplace=True)
 
+    def _determine_setitem_axis(self, row_lookup, col_lookup, row_scaler, col_scaler):
+        """
+        Determine an axis along which we should do an assignment.
+
+        Parameters
+        ----------
+        row_lookup: slice or list
+            Indexer for rows
+        col_lookup: slice or list
+            Indexer for columns
+        row_scaler: bool
+            Whether indexer for rows was slacar or not
+        col_scaler: bool
+            Whether indexer for columns was slacer or not
+
+        Returns
+        -------
+        int or None
+            None if this will be a both axis assignment, number of axis to assign in other cases.
+
+        Notes
+        -----
+            axis = 0: column assignment df[col] = item
+            axis = 1: row assignment df.loc[row] = item
+            axis = None: assignment along both axes
+        """
+        if self.df.shape == (1, 1):
+            return None if not (row_scaler ^ col_scaler) else 1 if row_scaler else 0
+
+        def get_axis(axis):
+            return self.qc.index if axis == 0 else self.qc.columns
+
+        row_lookup_len, col_lookup_len = [
+            len(lookup)
+            if not isinstance(lookup, slice)
+            else compute_sliced_len(lookup, len(get_axis(i)))
+            for i, lookup in enumerate([row_lookup, col_lookup])
+        ]
+
+        if (
+            row_lookup_len == len(self.qc.index)
+            and col_lookup_len == 1
+            and isinstance(self.df, DataFrame)
+        ):
+            axis = 0
+        elif col_lookup_len == len(self.qc.columns) and row_lookup_len == 1:
+            axis = 1
+        else:
+            axis = None
+        return axis
+
 
 class _LocIndexer(_LocationIndexerBase):
     """An indexer for modin_df.loc[] functionality."""
@@ -507,7 +574,7 @@ def __setitem__(self, key, item):
         -------
         What this returns (if anything)
         """
-        row_loc, col_loc, _, __, ___ = _parse_tuple(key)
+        row_loc, col_loc, _, row_scaler, col_scaler = _parse_tuple(key)
         if isinstance(row_loc, list) and len(row_loc) == 1:
             if row_loc[0] not in self.qc.index:
                 index = self.qc.index.insert(len(self.qc.index), row_loc[0])
@@ -525,7 +592,14 @@ def __setitem__(self, key, item):
             self.qc = self.df._query_compiler
         else:
             row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
-            super(_LocIndexer, self).__setitem__(row_lookup, col_lookup, item)
+            super(_LocIndexer, self).__setitem__(
+                row_lookup,
+                col_lookup,
+                item,
+                axis=self._determine_setitem_axis(
+                    row_lookup, col_lookup, row_scaler, col_scaler
+                ),
+            )
 
     def _compute_enlarge_labels(self, locator, base_index):
         """
@@ -663,12 +737,19 @@ def __setitem__(self, key, item):
         -------
         What this returns (if anything)
         """
-        row_loc, col_loc, _, __, ___ = _parse_tuple(key)
+        row_loc, col_loc, _, row_scaler, col_scaler = _parse_tuple(key)
         self._check_dtypes(row_loc)
         self._check_dtypes(col_loc)
 
         row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
-        super(_iLocIndexer, self).__setitem__(row_lookup, col_lookup, item)
+        super(_iLocIndexer, self).__setitem__(
+            row_lookup,
+            col_lookup,
+            item,
+            axis=self._determine_setitem_axis(
+                row_lookup, col_lookup, row_scaler, col_scaler
+            ),
+        )
 
     def _compute_lookup(self, row_loc, col_loc):
         """
diff --git a/modin/pandas/test/dataframe/test_indexing.py b/modin/pandas/test/dataframe/test_indexing.py
@@ -390,9 +390,6 @@ def test_loc_multi_index():
 @pytest.mark.parametrize("index", [["row1", "row2", "row3"], ["row1"]])
 @pytest.mark.parametrize("columns", [["col1", "col2"], ["col1"]])
 def test_loc_assignment(index, columns):
-    if len(index) == 1 and len(columns) == 1:
-        pytest.skip("See Modin issue #2253 for details")
-
     md_df, pd_df = create_test_dfs(index=index, columns=columns)
     for i, ind in enumerate(index):
         for j, col in enumerate(columns):