Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
# The example runs using pandas 1.5.3 but crashes using 2.0.0
json = r'{"schema": {"fields": [{"name": "model", "type": "string"}, {"name": "category", "type": "string"}, {"name": "error_no", "type": "integer"}, {"name": "time", "type": "datetime"}, {"name": "message", "type": "string"}], "primaryKey": ["model", "category", "error_no"], "pandas_version": "1.4.0"}, "data": [{"model": "modelsearch_candidate2", "category": "WARNING", "error_no": 0, "time": "2022-09-12T11:42:33.330Z", "message": "PARAMETER ESTIMATE IS NEAR ITS BOUNDARY"}, {"model": "modelsearch_candidate4", "category": "WARNING", "error_no": 0, "time": "2022-09-12T11:42:33.330Z", "message": "PARAMETER ESTIMATE IS NEAR ITS BOUNDARY"}]}'
import pandas as pd
df = pd.read_json(json, typ='frame', orient='table', precise_float=True)
Issue Description
I have serialized a pandas DataFrame into json using pandas 1.4.0. The dataframe contains a datetime
column. Using pandas 1.5.3 this could be read in fine, but pandas 2.0.0 gives an error (see below) when trying to read it.
The problem is that pandas tries to change the datatype of the datetime column at _table_schema.py:370
ending up with the timezone error.
TypeError Traceback (most recent call last)
Cell In[4], line 1
----> 1 df = pd.read_json(json, typ='frame', orient='table', precise_float=True)
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:784, in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_
default_dates, precise_float, date_unit, encoding, encoding_errors, lines, chunksize, compression, nrows, storage_options, dtype_backend, engine)
782 return json_reader
783 else:
--> 784 return json_reader.read()
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:975, in JsonReader.read(self)
973 obj = self._get_object_parser(self._combine_lines(data_lines))
974 else:
--> 975 obj = self._get_object_parser(self.data)
976 if self.dtype_backend is not lib.no_default:
977 return obj.convert_dtypes(
978 infer_objects=False, dtype_backend=self.dtype_backend
979 )
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:1001, in JsonReader._get_object_parser(self, json)
999 obj = None
1000 if typ == "frame":
-> 1001 obj = FrameParser(json, **kwargs).parse()
1003 if typ == "series" or obj is None:
1004 if not isinstance(dtype, bool):
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:1134, in Parser.parse(self)
1133 def parse(self):
-> 1134 self._parse()
1136 if self.obj is None:
1137 return None
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_json.py:1344, in FrameParser._parse(self)
1338 self.obj = DataFrame.from_dict(
1339 loads(json, precise_float=self.precise_float),
1340 dtype=None,
1341 orient="index",
1342 )
1343 elif orient == "table":
-> 1344 self.obj = parse_table_schema(json, precise_float=self.precise_float)
1345 else:
1346 self.obj = DataFrame(
1347 loads(json, precise_float=self.precise_float), dtype=None
1348 )
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/io/json/_table_schema.py:370, in parse_table_schema(json, precise_float)
365 if "timedelta64" in dtypes.values():
366 raise NotImplementedError(
367 'table="orient" can not yet read ISO-formatted Timedelta data'
368 )
--> 370 df = df.astype(dtypes)
372 if "primaryKey" in table["schema"]:
373 df = df.set_index(table["schema"]["primaryKey"])
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/generic.py:6305, in NDFrame.astype(self, dtype, copy, errors)
6303 else:
6304 try:
-> 6305 res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
6306 except ValueError as ex:
6307 ex.args = (
6308 f"{ex}: Error while type casting for column '{col_name}'",
6309 )
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/generic.py:6324, in NDFrame.astype(self, dtype, copy, errors)
6317 results = [
6318 self.iloc[:, i].astype(dtype, copy=copy)
6319 for i in range(len(self.columns))
6320 ]
6322 else:
6323 # else, only a single dtype is given
-> 6324 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
6325 return self._constructor(new_data).__finalize__(self, method="astype")
6327 # GH 33113: handle empty frame or series
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/internals/managers.py:451, in BaseBlockManager.astype(self, dtype, copy, errors)
448 elif using_copy_on_write():
449 copy = False
--> 451 return self.apply(
452 "astype",
453 dtype=dtype,
454 copy=copy,
455 errors=errors,
456 using_cow=using_copy_on_write(),
457 )
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/internals/managers.py:352, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
350 applied = b.apply(f, **kwargs)
351 else:
--> 352 applied = getattr(b, f)(**kwargs)
353 result_blocks = extend_blocks(applied, result_blocks)
355 out = type(self).from_blocks(result_blocks, self.axes)
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/internals/blocks.py:511, in Block.astype(self, dtype, copy, errors, using_cow)
491 """
492 Coerce to the new dtype.
493
(...)
507 Block
508 """
509 values = self.values
--> 511 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
513 new_values = maybe_coerce_values(new_values)
515 refs = None
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:242, in astype_array_safe(values, dtype, copy, errors)
239 dtype = dtype.numpy_dtype
241 try:
--> 242 new_values = astype_array(values, dtype, copy=copy)
243 except (ValueError, TypeError):
244 # e.g. _astype_nansafe can fail on object-dtype of strings
245 # trying to convert to float
246 if errors == "ignore":
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:187, in astype_array(values, dtype, copy)
184 values = values.astype(dtype, copy=copy)
186 else:
--> 187 values = _astype_nansafe(values, dtype, copy=copy)
189 # in pandas we don't store numpy str dtypes, so convert to object
190 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:116, in _astype_nansafe(arr, dtype, copy, skipna)
114 dti = to_datetime(arr.ravel())
115 dta = dti._data.reshape(arr.shape)
--> 116 return dta.astype(dtype, copy=False)._ndarray
118 elif is_timedelta64_dtype(dtype):
119 from pandas.core.construction import ensure_wrapped_if_datetimelike
File ~/devel/pharmpy/.tox/py311/lib/python3.11/site-packages/pandas/core/arrays/datetimes.py:682, in DatetimeArray.astype(self, dtype, copy)
676 # TODO: preserve freq?
678 elif self.tz is not None and is_datetime64_dtype(dtype):
679 # pre-2.0 behavior for DTA/DTI was
680 # values.tz_convert("UTC").tz_localize(None), which did not match
681 # the Series behavior
--> 682 raise TypeError(
683 "Cannot use .astype to convert from timezone-aware dtype to "
684 "timezone-naive dtype. Use obj.tz_localize(None) or "
685 "obj.tz_convert('UTC').tz_localize(None) instead."
686 )
688 elif (
689 self.tz is None
690 and is_datetime64_dtype(dtype)
691 and dtype != self.dtype
692 and is_unitless(dtype)
693 ):
694 raise TypeError(
695 "Casting to unit-less dtype 'datetime64' is not supported. "
696 "Pass e.g. 'datetime64[ns]' instead."
697 )
TypeError: Cannot use .astype to convert from timezone-aware dtype to timezone-naive dtype. Use obj.tz_localize(None) or obj.tz_convert('UTC').tz_localize(None) instead.
Expected Behavior
I expect it to still be possible to read the json using pandas 2.0.0
Installed Versions
I got an error when running pd.show_versions()
tests/tools/test_run.py:173: in test_retrieve_models
pd.show_versions()
.tox/py311/lib/python3.11/site-packages/pandas/util/_print_versions.py:109: in show_versions
deps = _get_dependency_info()
.tox/py311/lib/python3.11/site-packages/pandas/util/_print_versions.py:88: in _get_dependency_info
mod = import_optional_dependency(modname, errors="ignore")
.tox/py311/lib/python3.11/site-packages/pandas/compat/_optional.py:142: in import_optional_dependency
module = importlib.import_module(name)
/usr/local/lib/python3.11/importlib/init.py:126: in import_module
return _bootstrap._gcd_import(name[level:], package, level)
:1206: in _gcd_import
???
:1178: in _find_and_load
???
:1140: in _find_and_load_unlocked
???
:1080: in _find_spec
???
.tox/py311/lib/python3.11/site-packages/_distutils_hack/init.py:97: in find_spec
return method()
.tox/py311/lib/python3.11/site-packages/_distutils_hack/init.py:147: in spec_for_pip
clear_distutils()
.tox/py311/lib/python3.11/site-packages/_distutils_hack/init.py:33: in clear_distutils
warnings.warn("Setuptools is replacing distutils.")
E UserWarning: Setuptools is replacing distutils.