Skip to content

TST: Add test for where inplace #44255

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions pandas/_testing/_hypothesis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""
Hypothesis data generator helpers.
"""
from datetime import datetime

from hypothesis import strategies as st
from hypothesis.extra.dateutil import timezones as dateutil_timezones
from hypothesis.extra.pytz import timezones as pytz_timezones

from pandas.compat import is_platform_windows

import pandas as pd

from pandas.tseries.offsets import (
BMonthBegin,
BMonthEnd,
BQuarterBegin,
BQuarterEnd,
BYearBegin,
BYearEnd,
MonthBegin,
MonthEnd,
QuarterBegin,
QuarterEnd,
YearBegin,
YearEnd,
)

OPTIONAL_INTS = st.lists(st.one_of(st.integers(), st.none()), max_size=10, min_size=3)

OPTIONAL_FLOATS = st.lists(st.one_of(st.floats(), st.none()), max_size=10, min_size=3)

OPTIONAL_TEXT = st.lists(st.one_of(st.none(), st.text()), max_size=10, min_size=3)

OPTIONAL_DICTS = st.lists(
st.one_of(st.none(), st.dictionaries(st.text(), st.integers())),
max_size=10,
min_size=3,
)

OPTIONAL_LISTS = st.lists(
st.one_of(st.none(), st.lists(st.text(), max_size=10, min_size=3)),
max_size=10,
min_size=3,
)

if is_platform_windows():
DATETIME_NO_TZ = st.datetimes(min_value=datetime(1900, 1, 1))
else:
DATETIME_NO_TZ = st.datetimes()

DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),
max_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),
timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
)

DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes(
min_value=pd.Timestamp.min.to_pydatetime(warn=False),
max_value=pd.Timestamp.max.to_pydatetime(warn=False),
)

INT_NEG_999_TO_POS_999 = st.integers(-999, 999)

# The strategy for each type is registered in conftest.py, as they don't carry
# enough runtime information (e.g. type hints) to infer how to build them.
YQM_OFFSET = st.one_of(
*map(
st.from_type,
[
MonthBegin,
MonthEnd,
BMonthBegin,
BMonthEnd,
QuarterBegin,
QuarterEnd,
BQuarterBegin,
BQuarterEnd,
YearBegin,
YearEnd,
BYearBegin,
BYearEnd,
],
)
)
24 changes: 24 additions & 0 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from datetime import datetime

from hypothesis import (
given,
strategies as st,
)
import numpy as np
import pytest

Expand All @@ -16,6 +20,13 @@
isna,
)
import pandas._testing as tm
from pandas._testing._hypothesis import (
OPTIONAL_DICTS,
OPTIONAL_FLOATS,
OPTIONAL_INTS,
OPTIONAL_LISTS,
OPTIONAL_TEXT,
)


@pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"])
Expand Down Expand Up @@ -797,3 +808,16 @@ def test_where_columns_casting():
result = df.where(pd.notnull(df), None)
# make sure dtypes don't change
tm.assert_frame_equal(expected, result)


@given(
data=st.one_of(
OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT
)
)
def test_where_inplace_casting(data):
# GH 22051
df = DataFrame({"a": data})
df_copy = df.where(pd.notnull(df), None).copy()
df.where(pd.notnull(df), None, inplace=True)
tm.assert_equal(df, df_copy)
15 changes: 3 additions & 12 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,14 @@
from hypothesis import (
given,
settings,
strategies as st,
)
import numpy as np
import pytest
import pytz

from pandas._libs.tslibs import parsing
from pandas._libs.tslibs.parsing import parse_datetime_string
from pandas.compat import (
is_platform_windows,
np_array_datetime64_compat,
)
from pandas.compat import np_array_datetime64_compat
from pandas.compat.pyarrow import pa_version_under6p0

import pandas as pd
Expand All @@ -38,6 +34,7 @@
Timestamp,
)
import pandas._testing as tm
from pandas._testing._hypothesis import DATETIME_NO_TZ
from pandas.core.indexes.datetimes import date_range

import pandas.io.date_converters as conv
Expand All @@ -52,12 +49,6 @@
# constant
_DEFAULT_DATETIME = datetime(1, 1, 1)

# Strategy for hypothesis
if is_platform_windows():
date_strategy = st.datetimes(min_value=datetime(1900, 1, 1))
else:
date_strategy = st.datetimes()


@xfail_pyarrow
def test_read_csv_with_custom_date_parser(all_parsers):
Expand Down Expand Up @@ -1683,7 +1674,7 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs):


@skip_pyarrow
@given(date_strategy)
@given(DATETIME_NO_TZ)
@settings(deadline=None)
@pytest.mark.parametrize("delimiter", list(" -./"))
@pytest.mark.parametrize("dayfirst", [True, False])
Expand Down
76 changes: 5 additions & 71 deletions pandas/tests/tseries/offsets/test_offsets_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,92 +7,26 @@
You may wish to consult the previous version for inspiration on further
tests, or when trying to pin down the bugs exposed by the tests below.
"""
import warnings

from hypothesis import (
assume,
given,
strategies as st,
)
from hypothesis.errors import Flaky
from hypothesis.extra.dateutil import timezones as dateutil_timezones
from hypothesis.extra.pytz import timezones as pytz_timezones
import pytest
import pytz

import pandas as pd
from pandas import Timestamp

from pandas.tseries.offsets import (
BMonthBegin,
BMonthEnd,
BQuarterBegin,
BQuarterEnd,
BYearBegin,
BYearEnd,
MonthBegin,
MonthEnd,
QuarterBegin,
QuarterEnd,
YearBegin,
YearEnd,
)

# ----------------------------------------------------------------
# Helpers for generating random data

with warnings.catch_warnings():
warnings.simplefilter("ignore")
min_dt = Timestamp(1900, 1, 1).to_pydatetime()
max_dt = Timestamp(1900, 1, 1).to_pydatetime()

gen_date_range = st.builds(
pd.date_range,
start=st.datetimes(
# TODO: Choose the min/max values more systematically
min_value=Timestamp(1900, 1, 1).to_pydatetime(),
max_value=Timestamp(2100, 1, 1).to_pydatetime(),
),
periods=st.integers(min_value=2, max_value=100),
freq=st.sampled_from("Y Q M D H T s ms us ns".split()),
tz=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
from pandas._testing._hypothesis import (
DATETIME_JAN_1_1900_OPTIONAL_TZ,
YQM_OFFSET,
)

gen_random_datetime = st.datetimes(
min_value=min_dt,
max_value=max_dt,
timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
)

# The strategy for each type is registered in conftest.py, as they don't carry
# enough runtime information (e.g. type hints) to infer how to build them.
gen_yqm_offset = st.one_of(
*map(
st.from_type,
[
MonthBegin,
MonthEnd,
BMonthBegin,
BMonthEnd,
QuarterBegin,
QuarterEnd,
BQuarterBegin,
BQuarterEnd,
YearBegin,
YearEnd,
BYearBegin,
BYearEnd,
],
)
)


# ----------------------------------------------------------------
# Offset-specific behaviour tests


@pytest.mark.arm_slow
@given(gen_random_datetime, gen_yqm_offset)
@given(DATETIME_JAN_1_1900_OPTIONAL_TZ, YQM_OFFSET)
def test_on_offset_implementations(dt, offset):
assume(not offset.normalize)
# check that the class-specific implementations of is_on_offset match
Expand All @@ -112,7 +46,7 @@ def test_on_offset_implementations(dt, offset):


@pytest.mark.xfail(strict=False, raises=Flaky, reason="unreliable test timings")
@given(gen_yqm_offset)
@given(YQM_OFFSET)
def test_shift_across_dst(offset):
# GH#18319 check that 1) timezone is correctly normalized and
# 2) that hour is not incorrectly changed by this normalization
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/tseries/offsets/test_ticks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
example,
given,
settings,
strategies as st,
)
import numpy as np
import pytest
Expand All @@ -23,6 +22,7 @@
Timestamp,
)
import pandas._testing as tm
from pandas._testing._hypothesis import INT_NEG_999_TO_POS_999
from pandas.tests.tseries.offsets.common import assert_offset_equal

from pandas.tseries import offsets
Expand Down Expand Up @@ -66,7 +66,7 @@ def test_delta_to_tick():
@example(n=2, m=3)
@example(n=800, m=300)
@example(n=1000, m=5)
@given(n=st.integers(-999, 999), m=st.integers(-999, 999))
@given(n=INT_NEG_999_TO_POS_999, m=INT_NEG_999_TO_POS_999)
def test_tick_add_sub(cls, n, m):
# For all Tick subclasses and all integers n, m, we should have
# tick(n) + tick(m) == tick(n+m)
Expand All @@ -86,7 +86,7 @@ def test_tick_add_sub(cls, n, m):
@pytest.mark.parametrize("cls", tick_classes)
@settings(deadline=None)
@example(n=2, m=3)
@given(n=st.integers(-999, 999), m=st.integers(-999, 999))
@given(n=INT_NEG_999_TO_POS_999, m=INT_NEG_999_TO_POS_999)
def test_tick_equality(cls, n, m):
assume(m != n)
# tick == tock iff tick.n == tock.n
Expand Down
14 changes: 3 additions & 11 deletions pandas/tests/tslibs/test_ccalendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,13 @@
datetime,
)

from hypothesis import (
given,
strategies as st,
)
from hypothesis import given
import numpy as np
import pytest

from pandas._libs.tslibs import ccalendar

import pandas as pd
from pandas._testing._hypothesis import DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ


@pytest.mark.parametrize(
Expand Down Expand Up @@ -59,12 +56,7 @@ def test_dt_correct_iso_8601_year_week_and_day(input_date_tuple, expected_iso_tu
assert result == expected_iso_tuple


@given(
st.datetimes(
min_value=pd.Timestamp.min.to_pydatetime(warn=False),
max_value=pd.Timestamp.max.to_pydatetime(warn=False),
)
)
@given(DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ)
def test_isocalendar(dt):
expected = dt.isocalendar()
result = ccalendar.get_iso_calendar(dt.year, dt.month, dt.day)
Expand Down