Skip to content

PDEP0004: implementation #49024

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 54 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
ea79669
:wastebasket: deprecate infer_datetime_format, make strict
Oct 18, 2022
bb68cc3
:rotating_light: add warning about dayfirst
Oct 18, 2022
82266f4
:white_check_mark: add/update tests
Oct 18, 2022
4a6f198
:rotating_light: add warning if format cant be guessed
Oct 18, 2022
5568dca
:goal_net: catch warnings
Oct 18, 2022
bc910b0
:memo: update docs
Oct 18, 2022
7d03503
:memo: add example of reading csv file with mixed formats
Oct 19, 2022
ac825f5
:wastebasket: removed now outdated tests / clean inputs
Oct 19, 2022
2ffcef6
:memo: clarify whatsnew and user-guide
Oct 21, 2022
060835d
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Oct 21, 2022
1d9f274
Merge branch 'main' into implementation-pdep-4
MarcoGorelli Oct 28, 2022
b3e32ac
:art:
Oct 28, 2022
22417cf
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Oct 29, 2022
d3adfe5
guess %Y-%m format
Oct 29, 2022
affa7f3
Detect format from first non-na, but also exclude now and today
Oct 29, 2022
575b215
:white_check_mark: fixup tests based on now and today parsing
Oct 29, 2022
f0e83da
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Oct 29, 2022
a5ff448
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Nov 12, 2022
68a6ea2
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Nov 15, 2022
6661ae3
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Nov 17, 2022
1d255e0
fixup after merge
Nov 17, 2022
b3aa585
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Nov 17, 2022
285b1ff
fixup after merge
Nov 17, 2022
963b62b
fixup test
Nov 17, 2022
c90a8a5
remove outdated doctest
Nov 17, 2022
3c033ff
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Nov 19, 2022
cdfa355
xfail test based on issue 49767
Nov 19, 2022
434c6f0
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 2, 2022
5755032
wip
Dec 2, 2022
96c0653
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 3, 2022
9f1c18e
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 3, 2022
0a86705
add back examples of formats which can be guessed
Dec 3, 2022
7b4d6be
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 6, 2022
86e9bcf
start fixing up
Dec 6, 2022
f92a8cb
fixups from reviews
Dec 6, 2022
fd215df
lint
Dec 6, 2022
0a5c466
put tests back
Dec 6, 2022
772dd6c
shorten diff
Dec 6, 2022
b49b7cf
add example of string which cannot be guessed
Dec 6, 2022
17f5e74
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 6, 2022
d17d819
add deprecated directive, construct expected explicitly, explicit Use…
Dec 6, 2022
f4520e9
remove redundant example
Dec 6, 2022
fcb515f
restore newline
Dec 6, 2022
78b4b9e
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 9, 2022
2215652
double backticks around False, explicitly raise UserWarning
Dec 9, 2022
1ec70db
Merge branch 'main' into implementation-pdep-4
MarcoGorelli Dec 10, 2022
7b0eb99
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 12, 2022
7d11f59
reword warning
Dec 12, 2022
30e6f39
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 12, 2022
f0ac458
test both dayfirst True and False
Dec 12, 2022
92ef7e2
Merge remote-tracking branch 'upstream/main' into implementation-pdep-4
Dec 13, 2022
4a5dd1c
postmerge fixup
Dec 13, 2022
917b31b
unimportant typo to restart CI
Dec 13, 2022
135bbb5
Merge branch 'main' into implementation-pdep-4
MarcoGorelli Dec 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
✅ add/update tests
  • Loading branch information
MarcoGorelli committed Oct 20, 2022
commit 82266f45d2014cb85d1ff08e53e1de4dcdac32a7
3 changes: 2 additions & 1 deletion pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,8 @@ def test_with_dictlike_columns_with_datetime():
df["author"] = ["X", "Y", "Z"]
df["publisher"] = ["BBC", "NBC", "N24"]
df["date"] = pd.to_datetime(
["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"]
["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"],
dayfirst=True,
)
result = df.apply(lambda x: {}, axis=1)
expected = Series([{}, {}, {}])
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/frame/methods/test_drop.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,11 +405,11 @@ def test_drop_level_nonunique_datetime(self):
idx = Index([2, 3, 4, 4, 5], name="id")
idxdt = pd.to_datetime(
[
"201603231400",
"201603231500",
"201603231600",
"201603231600",
"201603231700",
"2016-03-23 14:00",
"2016-03-23 15:00",
"2016-03-23 16:00",
"2016-03-23 16:00",
"2016-03-23 17:00",
]
)
df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/frame/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

class TestDataFrameToCSV:
def read_csv(self, path, **kwargs):
params = {"index_col": 0, "parse_dates": True}
params = {"index_col": 0}
params.update(**kwargs)

return read_csv(path, **params)
Expand All @@ -46,17 +46,17 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame):
# freq does not roundtrip
datetime_frame.index = datetime_frame.index._with_freq(None)
datetime_frame.to_csv(path)
recons = self.read_csv(path)
recons = self.read_csv(path, parse_dates=True)
tm.assert_frame_equal(datetime_frame, recons)

datetime_frame.to_csv(path, index_label="index")
recons = self.read_csv(path, index_col=None)
recons = self.read_csv(path, index_col=None, parse_dates=True)

assert len(recons.columns) == len(datetime_frame.columns) + 1

# no index
datetime_frame.to_csv(path, index=False)
recons = self.read_csv(path, index_col=None)
recons = self.read_csv(path, index_col=None, parse_dates=True)
tm.assert_almost_equal(datetime_frame.values, recons.values)

# corner case
Expand Down Expand Up @@ -1056,7 +1056,7 @@ def test_to_csv_date_format(self, datetime_frame):

# test NaTs
nat_index = to_datetime(
["NaT"] * 10 + ["2000-01-01", "1/1/2000", "1-1-2000"]
["NaT"] * 10 + ["2000-01-01", "2000-01-01", "2000-01-01"]
)
nat_frame = DataFrame({"A": nat_index}, index=nat_index)
nat_frame.to_csv(path, date_format="%Y-%m-%d")
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,10 +1042,18 @@ def test_datetimeindex_constructor_misc(self):
arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
idx4 = DatetimeIndex(arr)

arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
# Can't be parsed consistently, need to parse each element individually
arr = [
to_datetime(date_string)
for date_string in ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
]
idx5 = DatetimeIndex(arr)

arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
# Can't be parsed consistently, need to parse each element individually
arr = [
to_datetime(date_string)
for date_string in ["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]
]
idx6 = DatetimeIndex(arr)

idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,10 +1185,16 @@ def test_equals_op_index_vs_mi_same_length(self):
expected = np.array([False, False, False])
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta])
def test_dt_conversion_preserves_name(self, dt_conv):
@pytest.mark.parametrize(
"dt_conv, arg",
[
(pd.to_datetime, ["2000-01-01", "2000-01-02"]),
(pd.to_timedelta, ["01:02:03", "01:02:04"]),
],
)
def test_dt_conversion_preserves_name(self, dt_conv, arg):
# GH 10875
index = Index(["01:02:03", "01:02:04"], name="label")
index = Index(arg, name="label")
assert index.name == dt_conv(index).name

def test_cached_properties_not_settable(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def _set_noconvert_columns(self):
return CParserWrapper._set_noconvert_columns(self)

data = """a,b,c,d,e
0,1,20140101,0900,4
0,1,20140102,1000,4"""
0,1,2014-01-01,09:00,4
0,1,2014-01-02,10:00,4"""

parse_dates = [[1, 2]]
cols = {
Expand Down
101 changes: 29 additions & 72 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1666,9 +1666,9 @@ def test_parse_delimited_date_swap_no_warning(
@pytest.mark.parametrize(
"date_string,dayfirst,expected",
[
# %d/%m/%Y; month > 12 thus replacement
# %d/%m/%Y; month > 12
("13/02/2019", False, datetime(2019, 2, 13)),
# %m/%d/%Y; day > 12 thus there will be no replacement
# %m/%d/%Y; day > 12
("02/13/2019", True, datetime(2019, 2, 13)),
],
)
Expand All @@ -1677,7 +1677,10 @@ def test_parse_delimited_date_swap_with_warning(
):
parser = all_parsers
expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
warning_msg = "Specify a format to ensure consistent parsing"
warning_msg = (
"Parsing dates in .* format when dayfirst=.* was specified. "
"Pass `dayfirst=.*` or specify a format to silence this warning."
)
result = parser.read_csv_check_warnings(
UserWarning,
warning_msg,
Expand All @@ -1691,13 +1694,11 @@ def test_parse_delimited_date_swap_with_warning(

def test_parse_multiple_delimited_dates_with_swap_warnings():
# GH46210
warning_msg = "Specify a format to ensure consistent parsing"
with tm.assert_produces_warning(UserWarning, match=warning_msg) as record:
with pytest.raises(
ValueError,
match=r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\)$",
):
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
assert len({str(warning.message) for warning in record}) == 1
# Using set(record) as repetitions of the same warning are suppressed
# https://docs.python.org/3/library/warnings.html
# and here we care to check that the warning is only shows once to users.


def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
Expand Down Expand Up @@ -1860,97 +1861,51 @@ def test_parse_dates_and_keep_orgin_column(all_parsers):

def test_dayfirst_warnings():
# GH 12585
warning_msg_day_first = (
r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) was "
r"specified. This may lead to inconsistently parsed dates! Specify a format "
r"to ensure consistent parsing."
)
warning_msg_month_first = (
"Parsing dates in MM/DD/YYYY format when dayfirst=True was "
"specified. This may lead to inconsistently parsed dates! Specify a format "
"to ensure consistent parsing."
)

# CASE 1: valid input
input = "date\n31/12/2014\n10/03/2011"
expected_consistent = DatetimeIndex(
expected = DatetimeIndex(
["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date"
)
expected_inconsistent = DatetimeIndex(
["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None, name="date"
warning_msg = (
"Parsing dates in .* format when dayfirst=.* was specified. "
"Pass `dayfirst=.*` or specify a format to silence this warning."
)

# A. dayfirst arg correct, no warning
res1 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
).index
tm.assert_index_equal(expected_consistent, res1)
tm.assert_index_equal(expected, res1)

# B. dayfirst arg incorrect, warning + incorrect output
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
# B. dayfirst arg incorrect, warning
with tm.assert_produces_warning(UserWarning, match=warning_msg):
res2 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
).index
tm.assert_index_equal(expected_inconsistent, res2)

# C. dayfirst default arg, same as B
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
res3 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
).index
tm.assert_index_equal(expected_inconsistent, res3)

# D. infer_datetime_format=True overrides dayfirst default
# no warning + correct result
res4 = read_csv(
StringIO(input),
parse_dates=["date"],
infer_datetime_format=True,
index_col="date",
).index
tm.assert_index_equal(expected_consistent, res4)
tm.assert_index_equal(expected, res2)

# CASE 2: invalid input
# cannot consistently process with single format
# warnings *always* raised
# return to user unaltered

# first in DD/MM/YYYY, second in MM/DD/YYYY
input = "date\n31/12/2014\n03/30/2011"
expected = DatetimeIndex(
["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None, name="date"
)
expected = Index(["31/12/2014", "03/30/2011"], dtype="object", name="date")

# A. use dayfirst=True
with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first):
res5 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
).index
res5 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
).index
tm.assert_index_equal(expected, res5)

# B. use dayfirst=False
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
with tm.assert_produces_warning(UserWarning, match=warning_msg):
res6 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
).index
tm.assert_index_equal(expected, res6)

# C. use dayfirst default arg, same as B
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
res7 = read_csv(
StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
).index
tm.assert_index_equal(expected, res7)

# D. use infer_datetime_format=True
with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
res8 = read_csv(
StringIO(input),
parse_dates=["date"],
infer_datetime_format=True,
index_col="date",
).index
tm.assert_index_equal(expected, res8)


@pytest.mark.parametrize(
"date_string, dayfirst",
Expand All @@ -1973,9 +1928,11 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst):
expected = DatetimeIndex(
["2014-01-31"], dtype="datetime64[ns]", freq=None, name="date"
)
with tm.assert_produces_warning(
UserWarning, match=r"may lead to inconsistently parsed dates"
):
warning_msg = (
"Parsing dates in .* format when dayfirst=.* was specified. "
"Pass `dayfirst=.*` or specify a format to silence this warning."
)
with tm.assert_produces_warning(UserWarning, match=warning_msg):
res = read_csv(
StringIO(initial_value),
parse_dates=["date"],
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/io/parser/usecols/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
def test_usecols_with_parse_dates(all_parsers, usecols):
# see gh-9755
data = """a,b,c,d,e
0,1,20140101,0900,4
0,1,20140102,1000,4"""
0,1,2014-01-01,09:00,4
0,1,2014-01-02,10:00,4"""
parser = all_parsers
parse_dates = [[1, 2]]

Expand Down Expand Up @@ -138,8 +138,8 @@ def test_usecols_with_parse_dates4(all_parsers):
)
def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names):
# see gh-9755
s = """0,1,20140101,0900,4
0,1,20140102,1000,4"""
s = """0,1,2014-01-01,09:00,4
0,1,2014-01-02,10:00,4"""
parse_dates = [[1, 2]]
parser = all_parsers

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1386,7 +1386,7 @@ def test_sqlalchemy_type_mapping(self):

# Test Timestamp objects (no datetime64 because of timezone) (GH9085)
df = DataFrame(
{"time": to_datetime(["201412120154", "201412110254"], utc=True)}
{"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
)
db = sql.SQLDatabase(self.conn)
table = sql.SQLTable("test_type", db, frame=df)
Expand Down Expand Up @@ -1595,7 +1595,7 @@ def test_sqlite_type_mapping(self):

# Test Timestamp objects (no datetime64 because of timezone) (GH9085)
df = DataFrame(
{"time": to_datetime(["201412120154", "201412110254"], utc=True)}
{"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
)
db = sql.SQLiteDatabase(self.conn)
table = sql.SQLiteTable("test_type", db, frame=df)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/xml/test_xml_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def test_day_first_parse_dates(parser):
)

with tm.assert_produces_warning(
UserWarning, match="Parsing dates in DD/MM/YYYY format"
UserWarning, match="Parsing dates in %d/%m/%Y format"
):
df_result = read_xml(xml, parse_dates=["date"], parser=parser)
df_iter = read_xml_iterparse(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/plotting/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def dtc(self):
return converter.DatetimeConverter()

def test_convert_accepts_unicode(self, dtc):
r1 = dtc.convert("12:22", None, None)
r2 = dtc.convert("12:22", None, None)
r1 = dtc.convert("2000-01-01 12:22", None, None)
r2 = dtc.convert("2000-01-01 12:22", None, None)
assert r1 == r2, "DatetimeConverter.convert should accept unicode"

def test_conversion(self, dtc):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/series/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

class TestSeriesToCSV:
def read_csv(self, path, **kwargs):
params = {"index_col": 0, "header": None, "parse_dates": True}
params = {"index_col": 0, "header": None}
params.update(**kwargs)

header = params.get("header")
Expand All @@ -30,7 +30,7 @@ def test_from_csv(self, datetime_series, string_series):

with tm.ensure_clean() as path:
datetime_series.to_csv(path, header=False)
ts = self.read_csv(path)
ts = self.read_csv(path, parse_dates=True)
tm.assert_series_equal(datetime_series, ts, check_names=False)

assert ts.name is None
Expand All @@ -55,7 +55,7 @@ def test_from_csv(self, datetime_series, string_series):
with open(path, "w") as outfile:
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")

series = self.read_csv(path, sep="|")
series = self.read_csv(path, sep="|", parse_dates=True)
check_series = Series(
{datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
)
Expand Down
Loading