Skip to content

Commit dba0f66

Browse files
authored
ERR: Check that dtype_backend is valid (#51871)
1 parent 1a7232a commit dba0f66

28 files changed

+188
-7
lines changed

pandas/core/generic.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
from pandas.util._decorators import doc
9797
from pandas.util._exceptions import find_stack_level
9898
from pandas.util._validators import (
99+
check_dtype_backend,
99100
validate_ascending,
100101
validate_bool_kwarg,
101102
validate_fillna_kwargs,
@@ -6590,8 +6591,8 @@ def convert_dtypes(
65906591
65916592
.. versionadded:: 1.2.0
65926593
dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable"
6593-
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
6594-
arrays, nullable dtypes are used for all dtypes that have a nullable
6594+
Which dtype_backend to use, e.g. whether a DataFrame should use nullable
6595+
dtypes for all dtypes that have a nullable
65956596
implementation when "numpy_nullable" is set, pyarrow is used for all
65966597
dtypes if "pyarrow" is set.
65976598
@@ -6710,6 +6711,7 @@ def convert_dtypes(
67106711
2 <NA>
67116712
dtype: string
67126713
"""
6714+
check_dtype_backend(dtype_backend)
67136715
if self.ndim == 1:
67146716
return self._convert_dtypes(
67156717
infer_objects,

pandas/core/internals/construction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,7 @@ def convert_object_array(
980980
----------
981981
content: List[np.ndarray]
982982
dtype: np.dtype or ExtensionDtype
983-
dtype_backend: Controls if nullable dtypes are returned.
983+
dtype_backend: Controls if nullable/pyarrow dtypes are returned.
984984
coerce_float: Cast floats that are integers to int.
985985
986986
Returns

pandas/core/tools/numeric.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99

1010
from pandas._libs import lib
11+
from pandas.util._validators import check_dtype_backend
1112

1213
from pandas.core.dtypes.cast import maybe_downcast_numeric
1314
from pandas.core.dtypes.common import (
@@ -166,6 +167,8 @@ def to_numeric(
166167
if errors not in ("ignore", "raise", "coerce"):
167168
raise ValueError("invalid error value specified")
168169

170+
check_dtype_backend(dtype_backend)
171+
169172
is_series = False
170173
is_index = False
171174
is_scalars = False

pandas/io/clipboards.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from pandas._libs import lib
99
from pandas.util._exceptions import find_stack_level
10+
from pandas.util._validators import check_dtype_backend
1011

1112
from pandas.core.dtypes.generic import ABCDataFrame
1213

@@ -58,6 +59,8 @@ def read_clipboard(
5859
if encoding is not None and encoding.lower().replace("-", "") != "utf8":
5960
raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
6061

62+
check_dtype_backend(dtype_backend)
63+
6164
from pandas.io.clipboard import clipboard_get
6265
from pandas.io.parsers import read_csv
6366

pandas/io/excel/_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
Appender,
3737
doc,
3838
)
39+
from pandas.util._validators import check_dtype_backend
3940

4041
from pandas.core.dtypes.common import (
4142
is_bool,
@@ -472,6 +473,8 @@ def read_excel(
472473
storage_options: StorageOptions = None,
473474
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
474475
) -> DataFrame | dict[IntStrT, DataFrame]:
476+
check_dtype_backend(dtype_backend)
477+
475478
should_close = False
476479
if not isinstance(io, ExcelFile):
477480
should_close = True

pandas/io/feather_format.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pandas._libs import lib
1111
from pandas.compat._optional import import_optional_dependency
1212
from pandas.util._decorators import doc
13+
from pandas.util._validators import check_dtype_backend
1314

1415
import pandas as pd
1516
from pandas.core.api import DataFrame
@@ -105,6 +106,8 @@ def read_feather(
105106
import_optional_dependency("pyarrow")
106107
from pyarrow import feather
107108

109+
check_dtype_backend(dtype_backend)
110+
108111
with get_handle(
109112
path, "rb", storage_options=storage_options, is_text=False
110113
) as handles:

pandas/io/html.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
AbstractMethodError,
2525
EmptyDataError,
2626
)
27+
from pandas.util._validators import check_dtype_backend
2728

2829
from pandas.core.dtypes.common import is_list_like
2930

@@ -1170,6 +1171,7 @@ def read_html(
11701171
f'"{extract_links}"'
11711172
)
11721173
validate_header_arg(header)
1174+
check_dtype_backend(dtype_backend)
11731175

11741176
io = stringify_path(io)
11751177

pandas/io/json/_json.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from pandas.compat._optional import import_optional_dependency
3030
from pandas.errors import AbstractMethodError
3131
from pandas.util._decorators import doc
32+
from pandas.util._validators import check_dtype_backend
3233

3334
from pandas.core.dtypes.common import (
3435
ensure_str,
@@ -747,6 +748,8 @@ def read_json(
747748
if orient == "table" and convert_axes:
748749
raise ValueError("cannot pass both convert_axes and orient='table'")
749750

751+
check_dtype_backend(dtype_backend)
752+
750753
if dtype is None and orient != "table":
751754
# error: Incompatible types in assignment (expression has type "bool", variable
752755
# has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float],
@@ -947,14 +950,18 @@ def read(self) -> DataFrame | Series:
947950
if self.engine == "pyarrow":
948951
pyarrow_json = import_optional_dependency("pyarrow.json")
949952
pa_table = pyarrow_json.read_json(self.data)
953+
954+
mapping: type[ArrowDtype] | None | Callable
950955
if self.dtype_backend == "pyarrow":
951-
return pa_table.to_pandas(types_mapper=ArrowDtype)
956+
mapping = ArrowDtype
952957
elif self.dtype_backend == "numpy_nullable":
953958
from pandas.io._util import _arrow_dtype_mapping
954959

955-
mapping = _arrow_dtype_mapping()
956-
return pa_table.to_pandas(types_mapper=mapping.get)
957-
return pa_table.to_pandas()
960+
mapping = _arrow_dtype_mapping().get
961+
else:
962+
mapping = None
963+
964+
return pa_table.to_pandas(types_mapper=mapping)
958965
elif self.engine == "ujson":
959966
if self.lines:
960967
if self.chunksize:

pandas/io/orc.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas._libs import lib
1313
from pandas.compat import pa_version_under8p0
1414
from pandas.compat._optional import import_optional_dependency
15+
from pandas.util._validators import check_dtype_backend
1516

1617
from pandas.core.dtypes.common import (
1718
is_categorical_dtype,
@@ -98,6 +99,8 @@ def read_orc(
9899

99100
orc = import_optional_dependency("pyarrow.orc")
100101

102+
check_dtype_backend(dtype_backend)
103+
101104
with get_handle(path, "rb", is_text=False) as handles:
102105
source = handles.handle
103106
if is_fsspec_url(path) and filesystem is None:

pandas/io/parquet.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pandas.errors import AbstractMethodError
1717
from pandas.util._decorators import doc
1818
from pandas.util._exceptions import find_stack_level
19+
from pandas.util._validators import check_dtype_backend
1920

2021
import pandas as pd
2122
from pandas import (
@@ -515,6 +516,7 @@ def read_parquet(
515516
DataFrame
516517
"""
517518
impl = get_engine(engine)
519+
518520
if use_nullable_dtypes is not lib.no_default:
519521
msg = (
520522
"The argument 'use_nullable_dtypes' is deprecated and will be removed "
@@ -527,6 +529,7 @@ def read_parquet(
527529
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
528530
else:
529531
use_nullable_dtypes = False
532+
check_dtype_backend(dtype_backend)
530533

531534
return impl.read(
532535
path,

pandas/io/parsers/readers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
)
3535
from pandas.util._decorators import Appender
3636
from pandas.util._exceptions import find_stack_level
37+
from pandas.util._validators import check_dtype_backend
3738

3839
from pandas.core.dtypes.common import (
3940
is_file_like,
@@ -1366,6 +1367,8 @@ def read_fwf(
13661367
kwds["colspecs"] = colspecs
13671368
kwds["infer_nrows"] = infer_nrows
13681369
kwds["engine"] = "python-fwf"
1370+
1371+
check_dtype_backend(dtype_backend)
13691372
kwds["dtype_backend"] = dtype_backend
13701373
return _read(filepath_or_buffer, kwds)
13711374

@@ -2019,6 +2022,8 @@ def _refine_defaults_read(
20192022
else:
20202023
raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines")
20212024

2025+
check_dtype_backend(dtype_backend)
2026+
20222027
kwds["dtype_backend"] = dtype_backend
20232028

20242029
return kwds

pandas/io/spss.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from pandas._libs import lib
99
from pandas.compat._optional import import_optional_dependency
10+
from pandas.util._validators import check_dtype_backend
1011

1112
from pandas.core.dtypes.inference import is_list_like
1213

@@ -52,6 +53,7 @@ def read_spss(
5253
DataFrame
5354
"""
5455
pyreadstat = import_optional_dependency("pyreadstat")
56+
check_dtype_backend(dtype_backend)
5557

5658
if usecols is not None:
5759
if not is_list_like(usecols):

pandas/io/sql.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
DatabaseError,
4040
)
4141
from pandas.util._exceptions import find_stack_level
42+
from pandas.util._validators import check_dtype_backend
4243

4344
from pandas.core.dtypes.common import (
4445
is_datetime64tz_dtype,
@@ -327,6 +328,7 @@ def read_sql_table(
327328
>>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP
328329
"""
329330

331+
check_dtype_backend(dtype_backend)
330332
if dtype_backend is lib.no_default:
331333
dtype_backend = "numpy" # type: ignore[assignment]
332334
assert dtype_backend is not lib.no_default
@@ -459,6 +461,7 @@ def read_sql_query(
459461
parameter will be converted to UTC.
460462
"""
461463

464+
check_dtype_backend(dtype_backend)
462465
if dtype_backend is lib.no_default:
463466
dtype_backend = "numpy" # type: ignore[assignment]
464467
assert dtype_backend is not lib.no_default
@@ -624,6 +627,7 @@ def read_sql(
624627
1 1 2010-11-12
625628
"""
626629

630+
check_dtype_backend(dtype_backend)
627631
if dtype_backend is lib.no_default:
628632
dtype_backend = "numpy" # type: ignore[assignment]
629633
assert dtype_backend is not lib.no_default

pandas/io/xml.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ParserError,
2020
)
2121
from pandas.util._decorators import doc
22+
from pandas.util._validators import check_dtype_backend
2223

2324
from pandas.core.dtypes.common import is_list_like
2425

@@ -1112,6 +1113,7 @@ def read_xml(
11121113
1 circle 360 NaN
11131114
2 triangle 180 3.0
11141115
"""
1116+
check_dtype_backend(dtype_backend)
11151117

11161118
return _parse(
11171119
path_or_buffer=path_or_buffer,

pandas/tests/frame/methods/test_convert_dtypes.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,13 @@ def test_pyarrow_dtype_empty_object(self):
124124
expected = pd.DataFrame(columns=[0])
125125
result = expected.convert_dtypes(dtype_backend="pyarrow")
126126
tm.assert_frame_equal(result, expected)
127+
128+
def test_pyarrow_engine_lines_false(self):
129+
# GH 48893
130+
df = pd.DataFrame({"a": [1, 2, 3]})
131+
msg = (
132+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
133+
"'pyarrow' are allowed."
134+
)
135+
with pytest.raises(ValueError, match=msg):
136+
df.convert_dtypes(dtype_backend="numpy")

pandas/tests/io/json/test_pandas.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,6 +1944,14 @@ def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
19441944

19451945
tm.assert_series_equal(result, expected)
19461946

1947+
def test_invalid_dtype_backend(self):
1948+
msg = (
1949+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
1950+
"'pyarrow' are allowed."
1951+
)
1952+
with pytest.raises(ValueError, match=msg):
1953+
read_json("test", dtype_backend="numpy")
1954+
19471955

19481956
def test_invalid_engine():
19491957
# GH 48893

pandas/tests/io/parser/test_read_fwf.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,3 +1001,12 @@ def test_dtype_backend(string_storage, dtype_backend):
10011001
expected["i"] = ArrowExtensionArray(pa.array([None, None]))
10021002

10031003
tm.assert_frame_equal(result, expected)
1004+
1005+
1006+
def test_invalid_dtype_backend():
1007+
msg = (
1008+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
1009+
"'pyarrow' are allowed."
1010+
)
1011+
with pytest.raises(ValueError, match=msg):
1012+
read_fwf("test", dtype_backend="numpy")

pandas/tests/io/parser/test_unsupported.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,3 +200,13 @@ def test_invalid_file_inputs(request, all_parsers):
200200

201201
with pytest.raises(ValueError, match="Invalid"):
202202
parser.read_csv([])
203+
204+
205+
def test_invalid_dtype_backend(all_parsers):
206+
parser = all_parsers
207+
msg = (
208+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
209+
"'pyarrow' are allowed."
210+
)
211+
with pytest.raises(ValueError, match=msg):
212+
parser.read_csv("test", dtype_backend="numpy")

pandas/tests/io/test_clipboard.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,3 +467,11 @@ def test_read_clipboard_dtype_backend(
467467
expected["g"] = ArrowExtensionArray(pa.array([None, None]))
468468

469469
tm.assert_frame_equal(result, expected)
470+
471+
def test_invalid_dtype_backend(self):
472+
msg = (
473+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
474+
"'pyarrow' are allowed."
475+
)
476+
with pytest.raises(ValueError, match=msg):
477+
read_clipboard(dtype_backend="numpy")

pandas/tests/io/test_feather.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,3 +212,14 @@ def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
212212
def test_int_columns_and_index(self):
213213
df = pd.DataFrame({"a": [1, 2, 3]}, index=pd.Index([3, 4, 5], name="test"))
214214
self.check_round_trip(df)
215+
216+
def test_invalid_dtype_backend(self):
217+
msg = (
218+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
219+
"'pyarrow' are allowed."
220+
)
221+
df = pd.DataFrame({"int": list(range(1, 4))})
222+
with tm.ensure_clean("tmp.feather") as path:
223+
df.to_feather(path)
224+
with pytest.raises(ValueError, match=msg):
225+
read_feather(path, dtype_backend="numpy")

pandas/tests/io/test_html.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,3 +1465,11 @@ def test_extract_links_all_no_header(self):
14651465
result = self.read_html(data, extract_links="all")[0]
14661466
expected = DataFrame([[("Google.com", "https://google.com")]])
14671467
tm.assert_frame_equal(result, expected)
1468+
1469+
def test_invalid_dtype_backend(self):
1470+
msg = (
1471+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
1472+
"'pyarrow' are allowed."
1473+
)
1474+
with pytest.raises(ValueError, match=msg):
1475+
read_html("test", dtype_backend="numpy")

0 commit comments

Comments
 (0)