Merge branch 'main' into cow_replace

phofl · web-flow · commit 9217d461c526 · 2023-01-22T01:02:58.000Z
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -443,6 +443,3 @@ repos:
         types: [python]
         files: ^pandas/tests
         language: python
-        exclude: |
-            (?x)
-            ^pandas/tests/generic/test_generic.py  # GH50380
diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
@@ -1341,7 +1341,9 @@ def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -
         assert_series_equal(ser[l_slc], expected)
 
 
-def assert_metadata_equivalent(left, right) -> None:
+def assert_metadata_equivalent(
+    left: DataFrame | Series, right: DataFrame | Series | None = None
+) -> None:
     """
     Check that ._metadata attributes are equivalent.
     """
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
@@ -9,6 +9,7 @@
     Series,
     Timestamp,
     date_range,
+    period_range,
 )
 import pandas._testing as tm
 from pandas.tests.copy_view.util import get_array
@@ -53,6 +54,78 @@ def test_copy_shallow(using_copy_on_write):
         assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
 
 
+@pytest.mark.parametrize("copy", [True, None, False])
+@pytest.mark.parametrize(
+    "method",
+    [
+        lambda df, copy: df.rename(columns=str.lower, copy=copy),
+        lambda df, copy: df.reindex(columns=["a", "c"], copy=copy),
+        lambda df, copy: df.reindex_like(df, copy=copy),
+        lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy),
+        lambda df, copy: df.rename_axis(index="test", copy=copy),
+        lambda df, copy: df.rename_axis(columns="test", copy=copy),
+        # lambda df, copy: df.astype({'b': 'int64'}, copy=copy),
+        # lambda df, copy: df.swaplevel(0, 0, copy=copy),
+        lambda df, copy: df.swapaxes(0, 0, copy=copy),
+        lambda df, copy: df.truncate(0, 5, copy=copy),
+        # lambda df, copy: df.infer_objects(copy=copy)
+        lambda df, copy: df.to_timestamp(copy=copy),
+        lambda df, copy: df.to_period(freq="D", copy=copy),
+        lambda df, copy: df.tz_localize("US/Central", copy=copy),
+        lambda df, copy: df.tz_convert("US/Central", copy=copy),
+        lambda df, copy: df.set_flags(allows_duplicate_labels=False, copy=copy),
+    ],
+    ids=[
+        "rename",
+        "reindex",
+        "reindex_like",
+        "set_axis",
+        "rename_axis0",
+        "rename_axis1",
+        # "astype",  # CoW not yet implemented
+        # "swaplevel",  # only series
+        "swapaxes",
+        "truncate",
+        # "infer_objects",  # CoW not yet implemented
+        "to_timestamp",
+        "to_period",
+        "tz_localize",
+        "tz_convert",
+        "set_flags",
+    ],
+)
+def test_methods_copy_keyword(
+    request, method, copy, using_copy_on_write, using_array_manager
+):
+    index = None
+    if "to_timestamp" in request.node.callspec.id:
+        index = period_range("2012-01-01", freq="D", periods=3)
+    elif "to_period" in request.node.callspec.id:
+        index = date_range("2012-01-01", freq="D", periods=3)
+    elif "tz_localize" in request.node.callspec.id:
+        index = date_range("2012-01-01", freq="D", periods=3)
+    elif "tz_convert" in request.node.callspec.id:
+        index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels")
+
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index)
+    df2 = method(df, copy=copy)
+
+    share_memory = (using_copy_on_write and copy is not True) or copy is False
+
+    if request.node.callspec.id.startswith("reindex-"):
+        # TODO copy=False without CoW still returns a copy in this case
+        if not using_copy_on_write and not using_array_manager and copy is False:
+            share_memory = False
+        # TODO copy=True with CoW still returns a view
+        if using_copy_on_write:
+            share_memory = True
+
+    if share_memory:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+
 # -----------------------------------------------------------------------------
 # DataFrame methods returning new DataFrame using shallow copy
 
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
@@ -50,7 +50,7 @@ def construct(box, shape, value=None, dtype=None, **kwargs):
     return box(arr, dtype=dtype, **kwargs)
 
 
-class Generic:
+class TestGeneric:
     @pytest.mark.parametrize(
         "func",
         [
@@ -66,7 +66,7 @@ def test_rename(self, frame_or_series, func):
 
         for axis in frame_or_series._AXIS_ORDERS:
             kwargs = {axis: idx}
-            obj = construct(4, **kwargs)
+            obj = construct(frame_or_series, 4, **kwargs)
 
             # rename a single axis
             result = obj.rename(**{axis: func})
@@ -83,21 +83,22 @@ def test_get_numeric_data(self, frame_or_series):
         }
 
         # get the numeric data
-        o = construct(n, **kwargs)
+        o = construct(frame_or_series, n, **kwargs)
         result = o._get_numeric_data()
         tm.assert_equal(result, o)
 
         # non-inclusion
         result = o._get_bool_data()
-        expected = construct(n, value="empty", **kwargs)
+        expected = construct(frame_or_series, n, value="empty", **kwargs)
         if isinstance(o, DataFrame):
             # preserve columns dtype
             expected.columns = o.columns[:0]
-        tm.assert_equal(result, expected)
+        # https://github.com/pandas-dev/pandas/issues/50862
+        tm.assert_equal(result.reset_index(drop=True), expected)
 
         # get the bool data
         arr = np.array([True, True, False, True])
-        o = construct(n, value=arr, **kwargs)
+        o = construct(frame_or_series, n, value=arr, **kwargs)
         result = o._get_numeric_data()
         tm.assert_equal(result, o)
 
@@ -160,7 +161,7 @@ def f(dtype):
 
         msg = (
             "compound dtypes are not implemented "
-            f"in the {frame_or_series.__name__} frame_or_series"
+            f"in the {frame_or_series.__name__} constructor"
         )
 
         with pytest.raises(NotImplementedError, match=msg):
@@ -257,7 +258,7 @@ def test_api_compat(self, func, frame_or_series):
         # GH 12021
         # compat for __name__, __qualname__
 
-        obj = (frame_or_series, 5)
+        obj = construct(frame_or_series, 5)
         f = getattr(obj, func)
         assert f.__name__ == func
         assert f.__qualname__.endswith(func)
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
@@ -50,10 +50,8 @@ def test_read_csv(cleared_fs, df1):
 
 
 def test_reasonable_error(monkeypatch, cleared_fs):
-    from fsspec import registry
     from fsspec.registry import known_implementations
 
-    registry.target.clear()
     with pytest.raises(ValueError, match="nosuchprotocol"):
         read_csv("nosuchprotocol://test/test.csv")
     err_msg = "test error message"
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
@@ -22,17 +22,12 @@
 @pytest.fixture
 def gcs_buffer(monkeypatch):
     """Emulate GCS using a binary buffer."""
-    from fsspec import (
-        AbstractFileSystem,
-        registry,
-    )
-
-    registry.target.clear()  # remove state
+    import fsspec
 
     gcs_buffer = BytesIO()
     gcs_buffer.close = lambda: True
 
-    class MockGCSFileSystem(AbstractFileSystem):
+    class MockGCSFileSystem(fsspec.AbstractFileSystem):
         @staticmethod
         def open(*args, **kwargs):
             gcs_buffer.seek(0)
@@ -42,7 +37,8 @@ def ls(self, path, **kwargs):
             # needed for pyarrow
             return [{"name": path, "type": "file"}]
 
-    monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
+    # Overwrites the default implementation from gcsfs to our mock class
+    fsspec.register_implementation("gs", MockGCSFileSystem, clobber=True)
 
     return gcs_buffer
 
@@ -55,9 +51,6 @@ def test_to_read_gcs(gcs_buffer, format):
 
     GH 33987
     """
-    from fsspec import registry
-
-    registry.target.clear()  # remove state
 
     df1 = DataFrame(
         {
@@ -132,9 +125,6 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)
     GH 35677 (to_csv, compression), GH 26124 (to_csv, encoding), and
     GH 32392 (read_csv, encoding)
     """
-    from fsspec import registry
-
-    registry.target.clear()  # remove state
     df = tm.makeDataFrame()
 
     # reference of compressed and encoded file
@@ -174,12 +164,8 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)
 @td.skip_if_no("gcsfs")
 def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
-    from fsspec import (
-        AbstractFileSystem,
-        registry,
-    )
+    from fsspec import AbstractFileSystem
 
-    registry.target.clear()  # remove state
     df1 = DataFrame(
         {
             "int": [1, 3],