diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index e3656b6ff4bcc..3af63c781d499 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -14,6 +14,7 @@ missing as libmissing, ) from pandas._libs.arrays import NDArrayBacked +from pandas._libs.lib import ensure_string_array from pandas.compat import pa_version_under7p0 from pandas.compat.numpy import function as nv from pandas.util._decorators import doc @@ -221,7 +222,7 @@ def __from_arrow__( arr = np.array([], dtype=object) else: arr = pyarrow.concat_arrays(chunks).to_numpy(zero_copy_only=False) - arr = lib.convert_nans_to_NA(arr) + arr = ensure_string_array(arr, na_value=libmissing.NA) # Bypass validation inside StringArray constructor, see GH#47781 new_string_array = StringArray.__new__(StringArray) NDArrayBacked.__init__( diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index ccba44b7f8fe4..b2d6e958020be 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1,5 +1,6 @@ """ test parquet compat """ import datetime +from decimal import Decimal from io import BytesIO import os import pathlib @@ -16,6 +17,7 @@ from pandas.compat.pyarrow import ( pa_version_under7p0, pa_version_under8p0, + pa_version_under11p0, pa_version_under13p0, ) @@ -1111,6 +1113,18 @@ def test_string_inference(self, tmp_path, pa): ) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(pa_version_under11p0, reason="not supported before 11.0") + def test_roundtrip_decimal(self, tmp_path, pa): + # GH#54768 + import pyarrow as pa + + path = tmp_path / "decimal.p" + df = pd.DataFrame({"a": [Decimal("123.00")]}, dtype="string[pyarrow]") + df.to_parquet(path, schema=pa.schema([("a", pa.decimal128(5))])) + result = read_parquet(path) + expected = pd.DataFrame({"a": ["123"]}, dtype="string[python]") + tm.assert_frame_equal(result, expected) + class TestParquetFastParquet(Base): def test_basic(self, fp, df_full):