From 17fe351ef7fe41f78733f8eeca9e697668a184be Mon Sep 17 00:00:00 2001 From: Pedro Frigolet Date: Wed, 3 Apr 2024 18:41:52 +0100 Subject: [PATCH] BUG: fixed to_numeric loss in precision when converting decimal type to integer #57213 --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/tools/numeric.py | 13 ++++++++++++- pandas/tests/tools/test_to_numeric.py | 7 +++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4debd41de213fa..a6a2f07f69ef50 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -329,6 +329,7 @@ Performance improvements Bug fixes ~~~~~~~~~ - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) +- Fixed bug in :func:`to_numeric` that resulted in precision loss when converting decimal type to integer. (:issue:`57213`) - Fixed bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) - Fixed bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 3d28a73df99d17..1b456b94c2ea0c 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -1,5 +1,6 @@ from __future__ import annotations +from decimal import Decimal from typing import ( TYPE_CHECKING, Literal, @@ -209,6 +210,7 @@ def to_numeric( values = values.view(np.int64) else: values = ensure_object(values) + old_values = values coerce_numeric = errors != "raise" values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload] values, @@ -255,7 +257,16 @@ def to_numeric( for typecode in typecodes: dtype = np.dtype(typecode) if dtype.itemsize <= values.dtype.itemsize: - values = maybe_downcast_numeric(values, dtype) + try: + if (len(old_values) != 0) and isinstance( + old_values[0], Decimal + ): + if dtype.itemsize == values.dtype.itemsize: + values = np.array(old_values, dtype=dtype) + else: + values = maybe_downcast_numeric(values, dtype) + except NameError: + values = maybe_downcast_numeric(values, dtype) # successful conversion if values.dtype == dtype: diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 585b7ca94f730d..a31309a11cb64a 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -904,3 +904,10 @@ def test_coerce_pyarrow_backend(): result = to_numeric(ser, errors="coerce", dtype_backend="pyarrow") expected = Series([1, 2, None], dtype=ArrowDtype(pa.int64())) tm.assert_series_equal(result, expected) + + +def test_decimal_precision(): + df = DataFrame({"column1": [decimal.Decimal("1" * 19)]}) + result = to_numeric(df["column1"], downcast="integer") + expected = df["column1"].astype("int64") + tm.assert_series_equal(result, expected)