Open
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import numpy as np
import pandas as pd
import sys
# settings pd
pd.options.display.precision = 16
pd.options.styler.format.precision = 16
# pd.describe_option()
limit = 17
exponents = np.linspace(-limit, limit, num=2 * limit + 1, endpoint=True, dtype='float64')
test_values = 1.2345678901234567890 * 10 ** exponents
df = pd.DataFrame()
df['x'] = test_values
# Convert the floating-point numbers to strings with high precision
df['x_string'] = df['x'].apply(lambda x: format(x, '.18e'))
df['exponent'] = exponents
df.to_json('./bug_df_to_json.json', indent=4, double_precision=15)
print(f"Pandas version: {pd.__version__}")
print(f"Numpy version: {np.__version__}")
print(f"Python version: {sys.version}")
# print(df)
#
# "1": 1.23456789012346e-16, # Precision 15
# "2": 0.000000000000001, # Precision 1
# "3": 0.000000000000012, # Precision 2
# "4": 0.000000000000123, # Precision 3
# "5": 0.000000000001235, # Precision 4
# "6": 0.000000000012346, # Precision 5
# "7": 0.000000000123457,
# "8": 0.000000001234568,
# "9": 0.000000012345679,
# "10": 0.000000123456789,
# "11": 0.00000123456789,
# "12": 0.000012345678901,
# "13": 0.000123456789012,
# "14": 0.001234567890123,
# "16": 0.123456789012346,
# "17": 1.234567890123457,
# "15": 0.012345678901235,
# "18": 12.345678901234567,
# "19": 123.456789012345666,
# "20": 1234.567890123456664,
# "21": 12345.678901234567093,
# "22": 123456.789012345674564,
# "23": 1234567.890123456716538,
# "24": 12345678.901234567165375,
# "25": 123456789.012345671653748,
# "26": 1234567890.123456716537476,
# "27": 12345678901.234567642211914,
# "28": 123456789012.345672607421875, # Precision 27
# "29": 1234567890123.456787109375,
# "30": 12345678901234.56640625,
# "31": 123456789012345.671875,
# "32": 1234567890123456.75, Precision 18
import numpy as np
import pandas as pd
import sys
# settings pd
pd.options.display.precision = 16
pd.options.styler.format.precision = 16
# pd.describe_option()
limit = 17
exponents = np.linspace(-limit, limit, num=2 * limit + 1, endpoint=True, dtype='float64')
test_values = 1.2345678901234567890 * 10 ** exponents
df = pd.DataFrame()
df['x'] = test_values
# Convert the floating-point numbers to strings with high precision
df['x_string'] = df['x'].apply(lambda x: format(x, '.18e'))
df['exponent'] = exponents
df.to_json('./bug_df_to_json.json', indent=4, double_precision=15)
print(f"Pandas version: {pd.__version__}")
print(f"Numpy version: {np.__version__}")
print(f"Python version: {sys.version}")
# print(df)
#
# "1": 1.23456789012346e-16, # Precision 15
# "2": 0.000000000000001, # Precision 1
# "3": 0.000000000000012, # Precision 2
# "4": 0.000000000000123, # Precision 3
# "5": 0.000000000001235, # Precision 4
# "6": 0.000000000012346, # Precision 5
# "7": 0.000000000123457,
# "8": 0.000000001234568,
# "9": 0.000000012345679,
# "10": 0.000000123456789,
# "11": 0.00000123456789,
# "12": 0.000012345678901,
# "13": 0.000123456789012,
# "14": 0.001234567890123,
# "16": 0.123456789012346,
# "17": 1.234567890123457,
# "15": 0.012345678901235,
# "18": 12.345678901234567,
# "19": 123.456789012345666,
# "20": 1234.567890123456664,
# "21": 12345.678901234567093,
# "22": 123456.789012345674564,
# "23": 1234567.890123456716538,
# "24": 12345678.901234567165375,
# "25": 123456789.012345671653748,
# "26": 1234567890.123456716537476,
# "27": 12345678901.234567642211914,
# "28": 123456789012.345672607421875, # Precision 27
# "29": 1234567890123.456787109375,
# "30": 12345678901234.56640625,
# "31": 123456789012345.671875,
# "32": 1234567890123456.75, Precision 18
Issue Description
the floats written to json have got a fixed length, the last digits are truncated. so precision is lost. the floats could be converted to scientific notation so no precision would be lost.
possible code for issue:
https://github.com/pandas-dev/pandas/blob/main/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
Expected Behavior
convert floats to scientific notation in json
Installed Versions
INSTALLED VERSIONS
------------------
commit : d9cdd2e
python : 3.11.9.final.0
python-bits : 64
OS : Windows
OS-release : 10
Version : 10.0.22631
machine : AMD64
processor : Intel64 Family 6 Model 170 Stepping 4, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : de_DE.cp1252
pandas : 2.2.2
numpy : 1.26.4
pytz : 2024.1
dateutil : 2.9.0.post0
setuptools : 68.0.0
pip : 24.0
Cython : 3.0.9
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : 3.2.0
lxml.etree : 5.1.0
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.3
IPython : 8.12.3
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.3
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : 2023.6.0
gcsfs : None
matplotlib : 3.8.3
numba : 0.59.1
numexpr : None
odfpy : None
openpyxl : 3.1.2
pandas_gbq : None
pyarrow : None
pyreadstat : None
python-calamine : None
pyxlsb : 1.0.10
s3fs : None
scipy : 1.12.0
sqlalchemy : None
tables : None
tabulate : 0.9.0
xarray : 2024.2.0
xlrd : None
zstandard : 0.22.0
tzdata : 2024.1
qtpy : 2.4.1
pyqt5 : None