Skip to content

Commit

Permalink
Merge branch 'main' into cow_replace
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Jan 22, 2023
2 parents 16dbe9a + ce9e3d6 commit 9217d46
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 33 deletions.
3 changes: 0 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,3 @@ repos:
types: [python]
files: ^pandas/tests
language: python
exclude: |
(?x)
^pandas/tests/generic/test_generic.py # GH50380
4 changes: 3 additions & 1 deletion pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,7 +1341,9 @@ def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -
assert_series_equal(ser[l_slc], expected)


def assert_metadata_equivalent(left, right) -> None:
def assert_metadata_equivalent(
left: DataFrame | Series, right: DataFrame | Series | None = None
) -> None:
"""
Check that ._metadata attributes are equivalent.
"""
Expand Down
73 changes: 73 additions & 0 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Series,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm
from pandas.tests.copy_view.util import get_array
Expand Down Expand Up @@ -53,6 +54,78 @@ def test_copy_shallow(using_copy_on_write):
assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))


@pytest.mark.parametrize("copy", [True, None, False])
@pytest.mark.parametrize(
"method",
[
lambda df, copy: df.rename(columns=str.lower, copy=copy),
lambda df, copy: df.reindex(columns=["a", "c"], copy=copy),
lambda df, copy: df.reindex_like(df, copy=copy),
lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy),
lambda df, copy: df.rename_axis(index="test", copy=copy),
lambda df, copy: df.rename_axis(columns="test", copy=copy),
# lambda df, copy: df.astype({'b': 'int64'}, copy=copy),
# lambda df, copy: df.swaplevel(0, 0, copy=copy),
lambda df, copy: df.swapaxes(0, 0, copy=copy),
lambda df, copy: df.truncate(0, 5, copy=copy),
# lambda df, copy: df.infer_objects(copy=copy)
lambda df, copy: df.to_timestamp(copy=copy),
lambda df, copy: df.to_period(freq="D", copy=copy),
lambda df, copy: df.tz_localize("US/Central", copy=copy),
lambda df, copy: df.tz_convert("US/Central", copy=copy),
lambda df, copy: df.set_flags(allows_duplicate_labels=False, copy=copy),
],
ids=[
"rename",
"reindex",
"reindex_like",
"set_axis",
"rename_axis0",
"rename_axis1",
# "astype", # CoW not yet implemented
# "swaplevel", # only series
"swapaxes",
"truncate",
# "infer_objects", # CoW not yet implemented
"to_timestamp",
"to_period",
"tz_localize",
"tz_convert",
"set_flags",
],
)
def test_methods_copy_keyword(
request, method, copy, using_copy_on_write, using_array_manager
):
index = None
if "to_timestamp" in request.node.callspec.id:
index = period_range("2012-01-01", freq="D", periods=3)
elif "to_period" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3)
elif "tz_localize" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3)
elif "tz_convert" in request.node.callspec.id:
index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels")

df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index)
df2 = method(df, copy=copy)

share_memory = (using_copy_on_write and copy is not True) or copy is False

if request.node.callspec.id.startswith("reindex-"):
# TODO copy=False without CoW still returns a copy in this case
if not using_copy_on_write and not using_array_manager and copy is False:
share_memory = False
# TODO copy=True with CoW still returns a view
if using_copy_on_write:
share_memory = True

if share_memory:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))


# -----------------------------------------------------------------------------
# DataFrame methods returning new DataFrame using shallow copy

Expand Down
17 changes: 9 additions & 8 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def construct(box, shape, value=None, dtype=None, **kwargs):
return box(arr, dtype=dtype, **kwargs)


class Generic:
class TestGeneric:
@pytest.mark.parametrize(
"func",
[
Expand All @@ -66,7 +66,7 @@ def test_rename(self, frame_or_series, func):

for axis in frame_or_series._AXIS_ORDERS:
kwargs = {axis: idx}
obj = construct(4, **kwargs)
obj = construct(frame_or_series, 4, **kwargs)

# rename a single axis
result = obj.rename(**{axis: func})
Expand All @@ -83,21 +83,22 @@ def test_get_numeric_data(self, frame_or_series):
}

# get the numeric data
o = construct(n, **kwargs)
o = construct(frame_or_series, n, **kwargs)
result = o._get_numeric_data()
tm.assert_equal(result, o)

# non-inclusion
result = o._get_bool_data()
expected = construct(n, value="empty", **kwargs)
expected = construct(frame_or_series, n, value="empty", **kwargs)
if isinstance(o, DataFrame):
# preserve columns dtype
expected.columns = o.columns[:0]
tm.assert_equal(result, expected)
# https://github.com/pandas-dev/pandas/issues/50862
tm.assert_equal(result.reset_index(drop=True), expected)

# get the bool data
arr = np.array([True, True, False, True])
o = construct(n, value=arr, **kwargs)
o = construct(frame_or_series, n, value=arr, **kwargs)
result = o._get_numeric_data()
tm.assert_equal(result, o)

Expand Down Expand Up @@ -160,7 +161,7 @@ def f(dtype):

msg = (
"compound dtypes are not implemented "
f"in the {frame_or_series.__name__} frame_or_series"
f"in the {frame_or_series.__name__} constructor"
)

with pytest.raises(NotImplementedError, match=msg):
Expand Down Expand Up @@ -257,7 +258,7 @@ def test_api_compat(self, func, frame_or_series):
# GH 12021
# compat for __name__, __qualname__

obj = (frame_or_series, 5)
obj = construct(frame_or_series, 5)
f = getattr(obj, func)
assert f.__name__ == func
assert f.__qualname__.endswith(func)
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,8 @@ def test_read_csv(cleared_fs, df1):


def test_reasonable_error(monkeypatch, cleared_fs):
from fsspec import registry
from fsspec.registry import known_implementations

registry.target.clear()
with pytest.raises(ValueError, match="nosuchprotocol"):
read_csv("nosuchprotocol://test/test.csv")
err_msg = "test error message"
Expand Down
24 changes: 5 additions & 19 deletions pandas/tests/io/test_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,12 @@
@pytest.fixture
def gcs_buffer(monkeypatch):
"""Emulate GCS using a binary buffer."""
from fsspec import (
AbstractFileSystem,
registry,
)

registry.target.clear() # remove state
import fsspec

gcs_buffer = BytesIO()
gcs_buffer.close = lambda: True

class MockGCSFileSystem(AbstractFileSystem):
class MockGCSFileSystem(fsspec.AbstractFileSystem):
@staticmethod
def open(*args, **kwargs):
gcs_buffer.seek(0)
Expand All @@ -42,7 +37,8 @@ def ls(self, path, **kwargs):
# needed for pyarrow
return [{"name": path, "type": "file"}]

monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
# Overwrites the default implementation from gcsfs to our mock class
fsspec.register_implementation("gs", MockGCSFileSystem, clobber=True)

return gcs_buffer

Expand All @@ -55,9 +51,6 @@ def test_to_read_gcs(gcs_buffer, format):
GH 33987
"""
from fsspec import registry

registry.target.clear() # remove state

df1 = DataFrame(
{
Expand Down Expand Up @@ -132,9 +125,6 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)
GH 35677 (to_csv, compression), GH 26124 (to_csv, encoding), and
GH 32392 (read_csv, encoding)
"""
from fsspec import registry

registry.target.clear() # remove state
df = tm.makeDataFrame()

# reference of compressed and encoded file
Expand Down Expand Up @@ -174,12 +164,8 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)
@td.skip_if_no("gcsfs")
def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
"""Regression test for writing to a not-yet-existent GCS Parquet file."""
from fsspec import (
AbstractFileSystem,
registry,
)
from fsspec import AbstractFileSystem

registry.target.clear() # remove state
df1 = DataFrame(
{
"int": [1, 3],
Expand Down

0 comments on commit 9217d46

Please sign in to comment.