Skip to content

Commit

Permalink
REF: ujson cleanups (#54581)
Browse files Browse the repository at this point in the history
* REF: use stdlib json

* remove unnecessary GET_TC calls

* REF: update dumps->ujson_dumps

* revert enum move
  • Loading branch information
jbrockmendel authored Aug 17, 2023
1 parent 7c9ba89 commit 3b34c3b
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 49 deletions.
28 changes: 15 additions & 13 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
is_datetimelike = 1;
if (PyObject_HasAttrString(item, "_value")) {
// pd.Timestamp object or pd.NaT
// see test_date_index_and_values for case with non-nano
i8date = get_long_attr(item, "_value");
} else {
Expand Down Expand Up @@ -1471,12 +1472,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
}
// Currently no way to pass longVal to iso function, so use
// state management
GET_TC(tc)->longValue = longVal;
pc->longValue = longVal;
tc->type = JT_UTF8;
} else {
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base);
pc->longValue = NpyDateTimeToEpoch(longVal, base);
tc->type = JT_LONG;
}
}
Expand All @@ -1497,9 +1498,9 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
if (PyLong_Check(obj)) {
tc->type = JT_LONG;
int overflow = 0;
GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
pc->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
int err;
err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred();
err = (pc->longValue == -1) && PyErr_Occurred();

if (overflow) {
tc->type = JT_BIGNUM;
Expand All @@ -1513,7 +1514,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
if (npy_isnan(val) || npy_isinf(val)) {
tc->type = JT_NULL;
} else {
GET_TC(tc)->doubleValue = val;
pc->doubleValue = val;
tc->type = JT_DOUBLE;
}
return;
Expand All @@ -1526,7 +1527,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
tc->type = JT_UTF8;
return;
} else if (object_is_decimal_type(obj)) {
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj);
pc->doubleValue = PyFloat_AsDouble(obj);
tc->type = JT_DOUBLE;
return;
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
Expand All @@ -1541,7 +1542,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
} else {
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
pc->longValue = PyDateTimeToEpoch(obj, base);
tc->type = JT_LONG;
}
return;
Expand Down Expand Up @@ -1573,12 +1574,13 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
} else {
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
pc->longValue = PyDateTimeToEpoch(obj, base);
tc->type = JT_LONG;
}
return;
} else if (PyDelta_Check(obj)) {
if (PyObject_HasAttrString(obj, "_value")) {
// pd.Timedelta object or pd.NaT
value = get_long_attr(obj, "_value");
} else {
value = total_seconds(obj) * 1000000000LL; // nanoseconds per sec
Expand All @@ -1604,11 +1606,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {

tc->type = JT_LONG;
}
GET_TC(tc)->longValue = value;
pc->longValue = value;
return;
} else if (PyArray_IsScalar(obj, Integer)) {
tc->type = JT_LONG;
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue),
PyArray_CastScalarToCtype(obj, &(pc->longValue),
PyArray_DescrFromType(NPY_INT64));

exc = PyErr_Occurred();
Expand All @@ -1619,12 +1621,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {

return;
} else if (PyArray_IsScalar(obj, Bool)) {
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue),
PyArray_CastScalarToCtype(obj, &(pc->longValue),
PyArray_DescrFromType(NPY_BOOL));
tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE;
tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE;
return;
} else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) {
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue),
PyArray_CastScalarToCtype(obj, &(pc->doubleValue),
PyArray_DescrFromType(NPY_DOUBLE));
tc->type = JT_DOUBLE;
return;
Expand Down
5 changes: 2 additions & 3 deletions pandas/io/excel/_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from collections import defaultdict
import datetime
import json
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -10,8 +11,6 @@
overload,
)

from pandas._libs import json

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import (
combine_kwargs,
Expand Down Expand Up @@ -257,7 +256,7 @@ def _process_style(self, style: dict[str, Any] | None) -> str | None:

if style is None:
return None
style_key = json.ujson_dumps(style)
style_key = json.dumps(style)
if style_key in self._style_dict:
return self._style_dict[style_key]
name = f"pd{len(self._style_dict)+1}"
Expand Down
5 changes: 2 additions & 3 deletions pandas/io/excel/_xlsxwriter.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from __future__ import annotations

import json
from typing import (
TYPE_CHECKING,
Any,
)

from pandas._libs import json

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import (
combine_kwargs,
Expand Down Expand Up @@ -262,7 +261,7 @@ def _write_cells(
for cell in cells:
val, fmt = self._value_with_fmt(cell.val)

stylekey = json.ujson_dumps(cell.style)
stylekey = json.dumps(cell.style)
if fmt:
stylekey += fmt

Expand Down
8 changes: 4 additions & 4 deletions pandas/io/json/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pandas.io.json._json import (
read_json,
to_json,
ujson_dumps as dumps,
ujson_loads as loads,
ujson_dumps,
ujson_loads,
)
from pandas.io.json._table_schema import build_table_schema

__all__ = [
"dumps",
"loads",
"ujson_dumps",
"ujson_loads",
"read_json",
"to_json",
"build_table_schema",
Expand Down
45 changes: 19 additions & 26 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
StringArray,
)

from pandas.io.json import ujson_dumps


def test_literal_json_deprecation():
# PR 53409
Expand Down Expand Up @@ -865,14 +867,13 @@ def test_date_index_and_values(self, date_format, as_object, date_typ):
)
def test_convert_dates_infer(self, infer_word):
# GH10747
from pandas.io.json import dumps

data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
expected = DataFrame(
[[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
)

result = read_json(StringIO(dumps(data)))[["id", infer_word]]
result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -1133,8 +1134,6 @@ def test_default_handler(self):
tm.assert_frame_equal(expected, result, check_index_type=False)

def test_default_handler_indirect(self):
from pandas.io.json import dumps

def default(obj):
if isinstance(obj, complex):
return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
Expand All @@ -1151,7 +1150,9 @@ def default(obj):
'[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
'["re",4.0],["im",-5.0]],"N\\/A"]]]'
)
assert dumps(df_list, default_handler=default, orient="values") == expected
assert (
ujson_dumps(df_list, default_handler=default, orient="values") == expected
)

def test_default_handler_numpy_unsupported_dtype(self):
# GH12554 to_json raises 'Unhandled numpy dtype 15'
Expand Down Expand Up @@ -1235,23 +1236,19 @@ def test_sparse(self):
],
)
def test_tz_is_utc(self, ts):
from pandas.io.json import dumps

exp = '"2013-01-10T05:00:00.000Z"'

assert dumps(ts, iso_dates=True) == exp
assert ujson_dumps(ts, iso_dates=True) == exp
dt = ts.to_pydatetime()
assert dumps(dt, iso_dates=True) == exp
assert ujson_dumps(dt, iso_dates=True) == exp

def test_tz_is_naive(self):
from pandas.io.json import dumps

ts = Timestamp("2013-01-10 05:00:00")
exp = '"2013-01-10T05:00:00.000"'

assert dumps(ts, iso_dates=True) == exp
assert ujson_dumps(ts, iso_dates=True) == exp
dt = ts.to_pydatetime()
assert dumps(dt, iso_dates=True) == exp
assert ujson_dumps(dt, iso_dates=True) == exp

@pytest.mark.parametrize(
"tz_range",
Expand All @@ -1262,42 +1259,38 @@ def test_tz_is_naive(self):
],
)
def test_tz_range_is_utc(self, tz_range):
from pandas.io.json import dumps

exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
dfexp = (
'{"DT":{'
'"0":"2013-01-01T05:00:00.000Z",'
'"1":"2013-01-02T05:00:00.000Z"}}'
)

assert dumps(tz_range, iso_dates=True) == exp
assert ujson_dumps(tz_range, iso_dates=True) == exp
dti = DatetimeIndex(tz_range)
# Ensure datetimes in object array are serialized correctly
# in addition to the normal DTI case
assert dumps(dti, iso_dates=True) == exp
assert dumps(dti.astype(object), iso_dates=True) == exp
assert ujson_dumps(dti, iso_dates=True) == exp
assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
df = DataFrame({"DT": dti})
result = dumps(df, iso_dates=True)
result = ujson_dumps(df, iso_dates=True)
assert result == dfexp
assert dumps(df.astype({"DT": object}), iso_dates=True)
assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)

def test_tz_range_is_naive(self):
from pandas.io.json import dumps

dti = pd.date_range("2013-01-01 05:00:00", periods=2)

exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'

# Ensure datetimes in object array are serialized correctly
# in addition to the normal DTI case
assert dumps(dti, iso_dates=True) == exp
assert dumps(dti.astype(object), iso_dates=True) == exp
assert ujson_dumps(dti, iso_dates=True) == exp
assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
df = DataFrame({"DT": dti})
result = dumps(df, iso_dates=True)
result = ujson_dumps(df, iso_dates=True)
assert result == dfexp
assert dumps(df.astype({"DT": object}), iso_dates=True)
assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)

def test_read_inline_jsonl(self):
# GH9180
Expand Down

0 comments on commit 3b34c3b

Please sign in to comment.