REF: ujson cleanups (#54581)

jbrockmendel · web-flow · commit 3b34c3bf395d · 2023-08-17T08:37:59.000-07:00
* REF: use stdlib json

* remove unnecessary GET_TC calls

* REF: update dumps-&gt;ujson_dumps

* revert enum move
diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c
@@ -1318,6 +1318,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
         } else if (PyDate_Check(item) || PyDelta_Check(item)) {
             is_datetimelike = 1;
             if (PyObject_HasAttrString(item, "_value")) {
+                // pd.Timestamp object or pd.NaT
                 // see test_date_index_and_values for case with non-nano
                 i8date = get_long_attr(item, "_value");
             } else {
@@ -1471,12 +1472,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
                 }
                 // Currently no way to pass longVal to iso function, so use
                 // state management
-                GET_TC(tc)->longValue = longVal;
+                pc->longValue = longVal;
                 tc->type = JT_UTF8;
             } else {
                 NPY_DATETIMEUNIT base =
                     ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-                GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base);
+                pc->longValue = NpyDateTimeToEpoch(longVal, base);
                 tc->type = JT_LONG;
             }
         }
@@ -1497,9 +1498,9 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
     if (PyLong_Check(obj)) {
         tc->type = JT_LONG;
         int overflow = 0;
-        GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
+        pc->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
         int err;
-        err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred();
+        err = (pc->longValue == -1) && PyErr_Occurred();
 
         if (overflow) {
             tc->type = JT_BIGNUM;
@@ -1513,7 +1514,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
         if (npy_isnan(val) || npy_isinf(val)) {
             tc->type = JT_NULL;
         } else {
-            GET_TC(tc)->doubleValue = val;
+            pc->doubleValue = val;
             tc->type = JT_DOUBLE;
         }
         return;
@@ -1526,7 +1527,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
         tc->type = JT_UTF8;
         return;
     } else if (object_is_decimal_type(obj)) {
-        GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj);
+        pc->doubleValue = PyFloat_AsDouble(obj);
         tc->type = JT_DOUBLE;
         return;
     } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
@@ -1541,7 +1542,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
         } else {
             NPY_DATETIMEUNIT base =
                 ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-            GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
+            pc->longValue = PyDateTimeToEpoch(obj, base);
             tc->type = JT_LONG;
         }
         return;
@@ -1573,12 +1574,13 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
         } else {
             NPY_DATETIMEUNIT base =
                 ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-            GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
+            pc->longValue = PyDateTimeToEpoch(obj, base);
             tc->type = JT_LONG;
         }
         return;
     } else if (PyDelta_Check(obj)) {
         if (PyObject_HasAttrString(obj, "_value")) {
+            // pd.Timedelta object or pd.NaT
             value = get_long_attr(obj, "_value");
         } else {
             value = total_seconds(obj) * 1000000000LL;  // nanoseconds per sec
@@ -1604,11 +1606,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
 
             tc->type = JT_LONG;
         }
-        GET_TC(tc)->longValue = value;
+        pc->longValue = value;
         return;
     } else if (PyArray_IsScalar(obj, Integer)) {
         tc->type = JT_LONG;
-        PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue),
+        PyArray_CastScalarToCtype(obj, &(pc->longValue),
                                   PyArray_DescrFromType(NPY_INT64));
 
         exc = PyErr_Occurred();
@@ -1619,12 +1621,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
 
         return;
     } else if (PyArray_IsScalar(obj, Bool)) {
-        PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue),
+        PyArray_CastScalarToCtype(obj, &(pc->longValue),
                                   PyArray_DescrFromType(NPY_BOOL));
-        tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE;
+        tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE;
         return;
     } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) {
-        PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue),
+        PyArray_CastScalarToCtype(obj, &(pc->doubleValue),
                                   PyArray_DescrFromType(NPY_DOUBLE));
         tc->type = JT_DOUBLE;
         return;
diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py
@@ -2,6 +2,7 @@
 
 from collections import defaultdict
 import datetime
+import json
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -10,8 +11,6 @@
     overload,
 )
 
-from pandas._libs import json
-
 from pandas.io.excel._base import ExcelWriter
 from pandas.io.excel._util import (
     combine_kwargs,
@@ -257,7 +256,7 @@ def _process_style(self, style: dict[str, Any] | None) -> str | None:
 
         if style is None:
             return None
-        style_key = json.ujson_dumps(style)
+        style_key = json.dumps(style)
         if style_key in self._style_dict:
             return self._style_dict[style_key]
         name = f"pd{len(self._style_dict)+1}"
diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
+import json
 from typing import (
     TYPE_CHECKING,
     Any,
 )
 
-from pandas._libs import json
-
 from pandas.io.excel._base import ExcelWriter
 from pandas.io.excel._util import (
     combine_kwargs,
@@ -262,7 +261,7 @@ def _write_cells(
         for cell in cells:
             val, fmt = self._value_with_fmt(cell.val)
 
-            stylekey = json.ujson_dumps(cell.style)
+            stylekey = json.dumps(cell.style)
             if fmt:
                 stylekey += fmt
 
diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py
@@ -1,14 +1,14 @@
 from pandas.io.json._json import (
     read_json,
     to_json,
-    ujson_dumps as dumps,
-    ujson_loads as loads,
+    ujson_dumps,
+    ujson_loads,
 )
 from pandas.io.json._table_schema import build_table_schema
 
 __all__ = [
-    "dumps",
-    "loads",
+    "ujson_dumps",
+    "ujson_loads",
     "read_json",
     "to_json",
     "build_table_schema",
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -28,6 +28,8 @@
     StringArray,
 )
 
+from pandas.io.json import ujson_dumps
+
 
 def test_literal_json_deprecation():
     # PR 53409
@@ -865,14 +867,13 @@ def test_date_index_and_values(self, date_format, as_object, date_typ):
     )
     def test_convert_dates_infer(self, infer_word):
         # GH10747
-        from pandas.io.json import dumps
 
         data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
         expected = DataFrame(
             [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
         )
 
-        result = read_json(StringIO(dumps(data)))[["id", infer_word]]
+        result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -1133,8 +1134,6 @@ def test_default_handler(self):
         tm.assert_frame_equal(expected, result, check_index_type=False)
 
     def test_default_handler_indirect(self):
-        from pandas.io.json import dumps
-
         def default(obj):
             if isinstance(obj, complex):
                 return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
@@ -1151,7 +1150,9 @@ def default(obj):
             '[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
             '["re",4.0],["im",-5.0]],"N\\/A"]]]'
         )
-        assert dumps(df_list, default_handler=default, orient="values") == expected
+        assert (
+            ujson_dumps(df_list, default_handler=default, orient="values") == expected
+        )
 
     def test_default_handler_numpy_unsupported_dtype(self):
         # GH12554 to_json raises 'Unhandled numpy dtype 15'
@@ -1235,23 +1236,19 @@ def test_sparse(self):
         ],
     )
     def test_tz_is_utc(self, ts):
-        from pandas.io.json import dumps
-
         exp = '"2013-01-10T05:00:00.000Z"'
 
-        assert dumps(ts, iso_dates=True) == exp
+        assert ujson_dumps(ts, iso_dates=True) == exp
         dt = ts.to_pydatetime()
-        assert dumps(dt, iso_dates=True) == exp
+        assert ujson_dumps(dt, iso_dates=True) == exp
 
     def test_tz_is_naive(self):
-        from pandas.io.json import dumps
-
         ts = Timestamp("2013-01-10 05:00:00")
         exp = '"2013-01-10T05:00:00.000"'
 
-        assert dumps(ts, iso_dates=True) == exp
+        assert ujson_dumps(ts, iso_dates=True) == exp
         dt = ts.to_pydatetime()
-        assert dumps(dt, iso_dates=True) == exp
+        assert ujson_dumps(dt, iso_dates=True) == exp
 
     @pytest.mark.parametrize(
         "tz_range",
@@ -1262,42 +1259,38 @@ def test_tz_is_naive(self):
         ],
     )
     def test_tz_range_is_utc(self, tz_range):
-        from pandas.io.json import dumps
-
         exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
         dfexp = (
             '{"DT":{'
             '"0":"2013-01-01T05:00:00.000Z",'
             '"1":"2013-01-02T05:00:00.000Z"}}'
         )
 
-        assert dumps(tz_range, iso_dates=True) == exp
+        assert ujson_dumps(tz_range, iso_dates=True) == exp
         dti = DatetimeIndex(tz_range)
         # Ensure datetimes in object array are serialized correctly
         # in addition to the normal DTI case
-        assert dumps(dti, iso_dates=True) == exp
-        assert dumps(dti.astype(object), iso_dates=True) == exp
+        assert ujson_dumps(dti, iso_dates=True) == exp
+        assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
         df = DataFrame({"DT": dti})
-        result = dumps(df, iso_dates=True)
+        result = ujson_dumps(df, iso_dates=True)
         assert result == dfexp
-        assert dumps(df.astype({"DT": object}), iso_dates=True)
+        assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
 
     def test_tz_range_is_naive(self):
-        from pandas.io.json import dumps
-
         dti = pd.date_range("2013-01-01 05:00:00", periods=2)
 
         exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
         dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'
 
         # Ensure datetimes in object array are serialized correctly
         # in addition to the normal DTI case
-        assert dumps(dti, iso_dates=True) == exp
-        assert dumps(dti.astype(object), iso_dates=True) == exp
+        assert ujson_dumps(dti, iso_dates=True) == exp
+        assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
         df = DataFrame({"DT": dti})
-        result = dumps(df, iso_dates=True)
+        result = ujson_dumps(df, iso_dates=True)
         assert result == dfexp
-        assert dumps(df.astype({"DT": object}), iso_dates=True)
+        assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
 
     def test_read_inline_jsonl(self):
         # GH9180