Skip to content

BUG: json that could be read by pandas 1.5.3 cannot be read by 2.0.0 #52595

Open
@rikardn

Description

@rikardn

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

# The example runs using pandas 1.5.3 but crashes using 2.0.0

json = r'{"schema": {"fields": [{"name": "model", "type": "string"}, {"name": "category", "type": "string"}, {"name": "error_no", "type": "integer"}, {"name": "time", "type": "datetime"}, {"name": "message", "type": "string"}], "primaryKey": ["model", "category", "error_no"], "pandas_version": "1.4.0"}, "data": [{"model": "modelsearch_candidate2", "category": "WARNING", "error_no": 0, "time": "2022-09-12T11:42:33.330Z", "message": "PARAMETER ESTIMATE IS NEAR ITS BOUNDARY"}, {"model": "modelsearch_candidate4", "category": "WARNING", "error_no": 0, "time": "2022-09-12T11:42:33.330Z", "message": "PARAMETER ESTIMATE IS NEAR ITS BOUNDARY"}]}'

import pandas as pd

df = pd.read_json(json, typ='frame', orient='table', precise_float=True)

Issue Description

I have serialized a pandas DataFrame into json using pandas 1.4.0. The dataframe contains a datetime column. Using pandas 1.5.3 this could be read in fine, but pandas 2.0.0 gives an error (see below) when trying to read it.

The problem is that pandas tries to change the datatype of the datetime column at _table_schema.py:370 ending up with the timezone error.

TypeError                                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 df = pd.read_json(json, typ='frame', orient='table', precise_float=True)

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:784, in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_
default_dates, precise_float, date_unit, encoding, encoding_errors, lines, chunksize, compression, nrows, storage_options, dtype_backend, engine)
    782     return json_reader
    783 else:
--> 784     return json_reader.read()

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:975, in JsonReader.read(self)
    973         obj = self._get_object_parser(self._combine_lines(data_lines))
    974 else:
--> 975     obj = self._get_object_parser(self.data)
    976 if self.dtype_backend is not lib.no_default:
    977     return obj.convert_dtypes(
    978         infer_objects=False, dtype_backend=self.dtype_backend
    979     )

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:1001, in JsonReader._get_object_parser(self, json)
    999 obj = None
   1000 if typ == "frame":
-> 1001     obj = FrameParser(json, **kwargs).parse()
   1003 if typ == "series" or obj is None:
   1004     if not isinstance(dtype, bool):

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:1134, in Parser.parse(self)
   1133 def parse(self):
-> 1134     self._parse()
   1136     if self.obj is None:
   1137         return None

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:1344, in FrameParser._parse(self)
   1338     self.obj = DataFrame.from_dict(
   1339         loads(json, precise_float=self.precise_float),
   1340         dtype=None,
   1341         orient="index",
   1342     )
   1343 elif orient == "table":
-> 1344     self.obj = parse_table_schema(json, precise_float=self.precise_float)
   1345 else:
   1346     self.obj = DataFrame(
   1347         loads(json, precise_float=self.precise_float), dtype=None
   1348     )

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_table_schema.py:370, in parse_table_schema(json, precise_float)
    365 if "timedelta64" in dtypes.values():
    366     raise NotImplementedError(
    367         'table="orient" can not yet read ISO-formatted Timedelta data'
    368     )
--> 370 df = df.astype(dtypes)
    372 if "primaryKey" in table["schema"]:
    373     df = df.set_index(table["schema"]["primaryKey"])

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/generic.py:6305, in NDFrame.astype(self, dtype, copy, errors)
   6303 else:
   6304     try:
-> 6305         res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
   6306     except ValueError as ex:
   6307         ex.args = (
   6308             f"{ex}: Error while type casting for column '{col_name}'",
   6309         )

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/generic.py:6324, in NDFrame.astype(self, dtype, copy, errors)
   6317     results = [
   6318         self.iloc[:, i].astype(dtype, copy=copy)
   6319         for i in range(len(self.columns))
   6320     ]
   6322 else:
   6323     # else, only a single dtype is given
-> 6324     new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
   6325     return self._constructor(new_data).__finalize__(self, method="astype")
   6327 # GH 33113: handle empty frame or series

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/internals/managers.py:451, in BaseBlockManager.astype(self, dtype, copy, errors)
    448 elif using_copy_on_write():
    449     copy = False
--> 451 return self.apply(
    452     "astype",
    453     dtype=dtype,
    454     copy=copy,
    455     errors=errors,
    456     using_cow=using_copy_on_write(),
    457 )

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/internals/managers.py:352, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
    350         applied = b.apply(f, **kwargs)
    351     else:
--> 352         applied = getattr(b, f)(**kwargs)
    353     result_blocks = extend_blocks(applied, result_blocks)
    355 out = type(self).from_blocks(result_blocks, self.axes)

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/internals/blocks.py:511, in Block.astype(self, dtype, copy, errors, using_cow)
    491 """
    492 Coerce to the new dtype.
    493 
   (...)
    507 Block
    508 """
    509 values = self.values
--> 511 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
    513 new_values = maybe_coerce_values(new_values)
    515 refs = None

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:242, in astype_array_safe(values, dtype, copy, errors)
    239     dtype = dtype.numpy_dtype
    241 try:
--> 242     new_values = astype_array(values, dtype, copy=copy)
    243 except (ValueError, TypeError):
    244     # e.g. _astype_nansafe can fail on object-dtype of strings
    245     #  trying to convert to float
    246     if errors == "ignore":

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:187, in astype_array(values, dtype, copy)
    184     values = values.astype(dtype, copy=copy)
    186 else:
--> 187     values = _astype_nansafe(values, dtype, copy=copy)
    189 # in pandas we don't store numpy str dtypes, so convert to object
    190 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:116, in _astype_nansafe(arr, dtype, copy, skipna)
    114     dti = to_datetime(arr.ravel())
    115     dta = dti._data.reshape(arr.shape)
--> 116     return dta.astype(dtype, copy=False)._ndarray
    118 elif is_timedelta64_dtype(dtype):
    119     from pandas.core.construction import ensure_wrapped_if_datetimelike

File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/arrays/datetimes.py:682, in DatetimeArray.astype(self, dtype, copy)
    676     # TODO: preserve freq?
    678 elif self.tz is not None and is_datetime64_dtype(dtype):
    679     # pre-2.0 behavior for DTA/DTI was
    680     #  values.tz_convert("UTC").tz_localize(None), which did not match
    681     #  the Series behavior
--> 682     raise TypeError(
    683         "Cannot use .astype to convert from timezone-aware dtype to "
    684         "timezone-naive dtype. Use obj.tz_localize(None) or "
    685         "obj.tz_convert('UTC').tz_localize(None) instead."
    686     )
    688 elif (
    689     self.tz is None
    690     and is_datetime64_dtype(dtype)
    691     and dtype != self.dtype
    692     and is_unitless(dtype)
    693 ):
    694     raise TypeError(
    695         "Casting to unit-less dtype 'datetime64' is not supported. "
    696         "Pass e.g. 'datetime64[ns]' instead."
    697     )

TypeError: Cannot use .astype to convert from timezone-aware dtype to timezone-naive dtype. Use obj.tz_localize(None) or obj.tz_convert('UTC').tz_localize(None) instead.


Expected Behavior

I expect it to still be possible to read the json using pandas 2.0.0

Installed Versions

I got an error when running pd.show_versions()

tests/tools/test_run.py:173: in test_retrieve_models
pd.show_versions()
.tox/py311/lib/python3.11/site-packages/pandas/util/_print_versions.py:109: in show_versions
deps = _get_dependency_info()
.tox/py311/lib/python3.11/site-packages/pandas/util/_print_versions.py:88: in _get_dependency_info
mod = import_optional_dependency(modname, errors="ignore")
.tox/py311/lib/python3.11/site-packages/pandas/compat/_optional.py:142: in import_optional_dependency
module = importlib.import_module(name)
/usr/local/lib/python3.11/importlib/init.py:126: in import_module
return _bootstrap._gcd_import(name[level:], package, level)
:1206: in _gcd_import
???
:1178: in _find_and_load
???
:1140: in _find_and_load_unlocked
???
:1080: in _find_spec
???
.tox/py311/lib/python3.11/site-packages/_distutils_hack/init.py:97: in find_spec
return method()
.tox/py311/lib/python3.11/site-packages/_distutils_hack/init.py:147: in spec_for_pip
clear_distutils()
.tox/py311/lib/python3.11/site-packages/_distutils_hack/init.py:33: in clear_distutils
warnings.warn("Setuptools is replacing distutils.")
E UserWarning: Setuptools is replacing distutils.

Metadata

Metadata

Assignees

No one assigned

    Labels

    IO JSONread_json, to_json, json_normalizeRegressionFunctionality that used to work in a prior pandas version

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions