Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: ujson cleanups #54581

Merged
merged 4 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
is_datetimelike = 1;
if (PyObject_HasAttrString(item, "_value")) {
// pd.Timestamp object or pd.NaT
// see test_date_index_and_values for case with non-nano
i8date = get_long_attr(item, "_value");
} else {
Expand Down Expand Up @@ -1471,12 +1472,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
}
// Currently no way to pass longVal to iso function, so use
// state management
GET_TC(tc)->longValue = longVal;
pc->longValue = longVal;
tc->type = JT_UTF8;
} else {
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base);
pc->longValue = NpyDateTimeToEpoch(longVal, base);
tc->type = JT_LONG;
}
}
Expand All @@ -1497,9 +1498,9 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
if (PyLong_Check(obj)) {
tc->type = JT_LONG;
int overflow = 0;
GET_TC(tc)->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
pc->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
int err;
err = (GET_TC(tc)->longValue == -1) && PyErr_Occurred();
err = (pc->longValue == -1) && PyErr_Occurred();

if (overflow) {
tc->type = JT_BIGNUM;
Expand All @@ -1513,7 +1514,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
if (npy_isnan(val) || npy_isinf(val)) {
tc->type = JT_NULL;
} else {
GET_TC(tc)->doubleValue = val;
pc->doubleValue = val;
tc->type = JT_DOUBLE;
}
return;
Expand All @@ -1526,7 +1527,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
tc->type = JT_UTF8;
return;
} else if (object_is_decimal_type(obj)) {
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj);
pc->doubleValue = PyFloat_AsDouble(obj);
tc->type = JT_DOUBLE;
return;
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
Expand All @@ -1541,7 +1542,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
} else {
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
pc->longValue = PyDateTimeToEpoch(obj, base);
tc->type = JT_LONG;
}
return;
Expand Down Expand Up @@ -1573,12 +1574,13 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
} else {
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base);
pc->longValue = PyDateTimeToEpoch(obj, base);
tc->type = JT_LONG;
}
return;
} else if (PyDelta_Check(obj)) {
if (PyObject_HasAttrString(obj, "_value")) {
// pd.Timedelta object or pd.NaT
value = get_long_attr(obj, "_value");
} else {
value = total_seconds(obj) * 1000000000LL; // nanoseconds per sec
Expand All @@ -1604,11 +1606,11 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {

tc->type = JT_LONG;
}
GET_TC(tc)->longValue = value;
pc->longValue = value;
return;
} else if (PyArray_IsScalar(obj, Integer)) {
tc->type = JT_LONG;
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue),
PyArray_CastScalarToCtype(obj, &(pc->longValue),
PyArray_DescrFromType(NPY_INT64));

exc = PyErr_Occurred();
Expand All @@ -1619,12 +1621,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {

return;
} else if (PyArray_IsScalar(obj, Bool)) {
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue),
PyArray_CastScalarToCtype(obj, &(pc->longValue),
PyArray_DescrFromType(NPY_BOOL));
tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE;
tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE;
return;
} else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) {
PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue),
PyArray_CastScalarToCtype(obj, &(pc->doubleValue),
PyArray_DescrFromType(NPY_DOUBLE));
tc->type = JT_DOUBLE;
return;
Expand Down
5 changes: 2 additions & 3 deletions pandas/io/excel/_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from collections import defaultdict
import datetime
import json
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -10,8 +11,6 @@
overload,
)

from pandas._libs import json

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import (
combine_kwargs,
Expand Down Expand Up @@ -257,7 +256,7 @@ def _process_style(self, style: dict[str, Any] | None) -> str | None:

if style is None:
return None
style_key = json.ujson_dumps(style)
style_key = json.dumps(style)
if style_key in self._style_dict:
return self._style_dict[style_key]
name = f"pd{len(self._style_dict)+1}"
Expand Down
5 changes: 2 additions & 3 deletions pandas/io/excel/_xlsxwriter.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from __future__ import annotations

import json
from typing import (
TYPE_CHECKING,
Any,
)

from pandas._libs import json

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import (
combine_kwargs,
Expand Down Expand Up @@ -262,7 +261,7 @@ def _write_cells(
for cell in cells:
val, fmt = self._value_with_fmt(cell.val)

stylekey = json.ujson_dumps(cell.style)
stylekey = json.dumps(cell.style)
if fmt:
stylekey += fmt

Expand Down
8 changes: 4 additions & 4 deletions pandas/io/json/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pandas.io.json._json import (
read_json,
to_json,
ujson_dumps as dumps,
ujson_loads as loads,
ujson_dumps,
ujson_loads,
)
from pandas.io.json._table_schema import build_table_schema

__all__ = [
"dumps",
"loads",
"ujson_dumps",
"ujson_loads",
"read_json",
"to_json",
"build_table_schema",
Expand Down
45 changes: 19 additions & 26 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
StringArray,
)

from pandas.io.json import ujson_dumps


def test_literal_json_deprecation():
# PR 53409
Expand Down Expand Up @@ -865,14 +867,13 @@ def test_date_index_and_values(self, date_format, as_object, date_typ):
)
def test_convert_dates_infer(self, infer_word):
# GH10747
from pandas.io.json import dumps

data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
expected = DataFrame(
[[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
)

result = read_json(StringIO(dumps(data)))[["id", infer_word]]
result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -1133,8 +1134,6 @@ def test_default_handler(self):
tm.assert_frame_equal(expected, result, check_index_type=False)

def test_default_handler_indirect(self):
from pandas.io.json import dumps

def default(obj):
if isinstance(obj, complex):
return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
Expand All @@ -1151,7 +1150,9 @@ def default(obj):
'[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
'["re",4.0],["im",-5.0]],"N\\/A"]]]'
)
assert dumps(df_list, default_handler=default, orient="values") == expected
assert (
ujson_dumps(df_list, default_handler=default, orient="values") == expected
)

def test_default_handler_numpy_unsupported_dtype(self):
# GH12554 to_json raises 'Unhandled numpy dtype 15'
Expand Down Expand Up @@ -1235,23 +1236,19 @@ def test_sparse(self):
],
)
def test_tz_is_utc(self, ts):
from pandas.io.json import dumps

exp = '"2013-01-10T05:00:00.000Z"'

assert dumps(ts, iso_dates=True) == exp
assert ujson_dumps(ts, iso_dates=True) == exp
dt = ts.to_pydatetime()
assert dumps(dt, iso_dates=True) == exp
assert ujson_dumps(dt, iso_dates=True) == exp

def test_tz_is_naive(self):
from pandas.io.json import dumps

ts = Timestamp("2013-01-10 05:00:00")
exp = '"2013-01-10T05:00:00.000"'

assert dumps(ts, iso_dates=True) == exp
assert ujson_dumps(ts, iso_dates=True) == exp
dt = ts.to_pydatetime()
assert dumps(dt, iso_dates=True) == exp
assert ujson_dumps(dt, iso_dates=True) == exp

@pytest.mark.parametrize(
"tz_range",
Expand All @@ -1262,42 +1259,38 @@ def test_tz_is_naive(self):
],
)
def test_tz_range_is_utc(self, tz_range):
from pandas.io.json import dumps

exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
dfexp = (
'{"DT":{'
'"0":"2013-01-01T05:00:00.000Z",'
'"1":"2013-01-02T05:00:00.000Z"}}'
)

assert dumps(tz_range, iso_dates=True) == exp
assert ujson_dumps(tz_range, iso_dates=True) == exp
dti = DatetimeIndex(tz_range)
# Ensure datetimes in object array are serialized correctly
# in addition to the normal DTI case
assert dumps(dti, iso_dates=True) == exp
assert dumps(dti.astype(object), iso_dates=True) == exp
assert ujson_dumps(dti, iso_dates=True) == exp
assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
df = DataFrame({"DT": dti})
result = dumps(df, iso_dates=True)
result = ujson_dumps(df, iso_dates=True)
assert result == dfexp
assert dumps(df.astype({"DT": object}), iso_dates=True)
assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)

def test_tz_range_is_naive(self):
from pandas.io.json import dumps

dti = pd.date_range("2013-01-01 05:00:00", periods=2)

exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'

# Ensure datetimes in object array are serialized correctly
# in addition to the normal DTI case
assert dumps(dti, iso_dates=True) == exp
assert dumps(dti.astype(object), iso_dates=True) == exp
assert ujson_dumps(dti, iso_dates=True) == exp
assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
df = DataFrame({"DT": dti})
result = dumps(df, iso_dates=True)
result = ujson_dumps(df, iso_dates=True)
assert result == dfexp
assert dumps(df.astype({"DT": object}), iso_dates=True)
assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)

def test_read_inline_jsonl(self):
# GH9180
Expand Down
Loading