Skip to content

Commit 0fc795a

Browse files
authored
feat: add bigframes.bigquery.to_json (#2078)
1 parent 090ce8e commit 0fc795a

File tree

6 files changed

+87
-0
lines changed

6 files changed

+87
-0
lines changed

bigframes/bigquery/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
json_value,
5252
json_value_array,
5353
parse_json,
54+
to_json,
5455
to_json_string,
5556
)
5657
from bigframes.bigquery._operations.search import create_vector_index, vector_search
@@ -89,6 +90,7 @@
8990
json_value,
9091
json_value_array,
9192
parse_json,
93+
to_json,
9294
to_json_string,
9395
# search ops
9496
create_vector_index,

bigframes/bigquery/_operations/json.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,40 @@ def json_value_array(
430430
return input._apply_unary_op(ops.JSONValueArray(json_path=json_path))
431431

432432

433+
def to_json(
434+
input: series.Series,
435+
) -> series.Series:
436+
"""Converts a series with a JSON value to a JSON-formatted STRING value.
437+
438+
**Examples:**
439+
440+
>>> import bigframes.pandas as bpd
441+
>>> import bigframes.bigquery as bbq
442+
>>> bpd.options.display.progress_bar = None
443+
444+
>>> s = bpd.Series([1, 2, 3])
445+
>>> bbq.to_json(s)
446+
0 1
447+
1 2
448+
2 3
449+
dtype: extension<dbjson<JSONArrowType>>[pyarrow]
450+
451+
>>> s = bpd.Series([{"int": 1, "str": "pandas"}, {"int": 2, "str": "numpy"}])
452+
>>> bbq.to_json(s)
453+
0 {"int":1,"str":"pandas"}
454+
1 {"int":2,"str":"numpy"}
455+
dtype: extension<dbjson<JSONArrowType>>[pyarrow]
456+
457+
Args:
458+
input (bigframes.series.Series):
459+
The Series containing JSON or JSON-formatted string values.
460+
461+
Returns:
462+
bigframes.series.Series: A new Series with the JSON value.
463+
"""
464+
return input._apply_unary_op(ops.ToJSON())
465+
466+
433467
def to_json_string(
434468
input: series.Series,
435469
) -> series.Series:

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,11 @@ def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
13021302
return parse_json(json_str=x)
13031303

13041304

1305+
@scalar_op_compiler.register_unary_op(ops.ToJSON)
1306+
def to_json_op_impl(json_obj: ibis_types.Value):
1307+
return to_json(json_obj=json_obj)
1308+
1309+
13051310
@scalar_op_compiler.register_unary_op(ops.ToJSONString)
13061311
def to_json_string_op_impl(x: ibis_types.Value):
13071312
return to_json_string(value=x)
@@ -2093,6 +2098,11 @@ def json_extract_string_array( # type: ignore[empty-body]
20932098
"""Extracts a JSON array and converts it to a SQL ARRAY of STRINGs."""
20942099

20952100

2101+
@ibis_udf.scalar.builtin(name="to_json")
2102+
def to_json(json_obj) -> ibis_dtypes.JSON: # type: ignore[empty-body]
2103+
"""Convert to JSON."""
2104+
2105+
20962106
@ibis_udf.scalar.builtin(name="to_json_string")
20972107
def to_json_string(value) -> ibis_dtypes.String: # type: ignore[empty-body]
20982108
"""Convert value to JSON-formatted string."""

bigframes/operations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
JSONValue,
125125
JSONValueArray,
126126
ParseJSON,
127+
ToJSON,
127128
ToJSONString,
128129
)
129130
from bigframes.operations.numeric_ops import (
@@ -376,6 +377,7 @@
376377
"JSONValue",
377378
"JSONValueArray",
378379
"ParseJSON",
380+
"ToJSON",
379381
"ToJSONString",
380382
# Bool ops
381383
"and_op",

bigframes/operations/json_ops.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,20 @@ def output_type(self, *input_types):
102102
return dtypes.JSON_DTYPE
103103

104104

105+
@dataclasses.dataclass(frozen=True)
106+
class ToJSON(base_ops.UnaryOp):
107+
name: typing.ClassVar[str] = "to_json"
108+
109+
def output_type(self, *input_types):
110+
input_type = input_types[0]
111+
if not dtypes.is_json_encoding_type(input_type):
112+
raise TypeError(
113+
"The value to be assigned must be a type that can be encoded as JSON."
114+
+ f"Received type: {input_type}"
115+
)
116+
return dtypes.JSON_DTYPE
117+
118+
105119
@dataclasses.dataclass(frozen=True)
106120
class ToJSONString(base_ops.UnaryOp):
107121
name: typing.ClassVar[str] = "to_json_string"

tests/system/small/bigquery/test_json.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,31 @@ def test_parse_json_w_invalid_series_type():
386386
bbq.parse_json(s)
387387

388388

389+
def test_to_json_from_int():
390+
s = bpd.Series([1, 2, None, 3])
391+
actual = bbq.to_json(s)
392+
expected = bpd.Series(["1.0", "2.0", "null", "3.0"], dtype=dtypes.JSON_DTYPE)
393+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
394+
395+
396+
def test_to_json_from_struct():
397+
s = bpd.Series(
398+
[
399+
{"version": 1, "project": "pandas"},
400+
{"version": 2, "project": "numpy"},
401+
]
402+
)
403+
assert dtypes.is_struct_like(s.dtype)
404+
405+
actual = bbq.to_json(s)
406+
expected = bpd.Series(
407+
['{"project":"pandas","version":1}', '{"project":"numpy","version":2}'],
408+
dtype=dtypes.JSON_DTYPE,
409+
)
410+
411+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
412+
413+
389414
def test_to_json_string_from_int():
390415
s = bpd.Series([1, 2, None, 3])
391416
actual = bbq.to_json_string(s)

0 commit comments

Comments
 (0)