Skip to content

Commit d011e7b

Browse files
committed
fix(trino): convert STRUCT/ROW/ARRAY types to valid JSON strings
1 parent 358138f commit d011e7b

File tree

3 files changed

+233
-0
lines changed

3 files changed

+233
-0
lines changed

deepnote_toolkit/ocelots/pandas/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import json
2+
13
import numpy as np
24
import pandas as pd
35
from packaging.requirements import Requirement
@@ -11,8 +13,23 @@ def safe_convert_to_string(value):
1113
1214
Note: For bytes, this returns Python's standard string representation (e.g., b'hello')
1315
rather than base64 encoding, which is more human-readable.
16+
17+
For dicts, lists, and tuples, this returns valid JSON using json.dumps() rather than str().
18+
This is critical for databases like Trino that return structured types (STRUCT/ROW/ARRAY)
19+
as Python objects (NamedRowTuple, dict, list) instead of strings. Using str() on these
20+
would produce invalid JSON with single quotes like "{'a': 'x'}" instead of valid JSON
21+
like '{"a": "x"}', causing frontend rendering to fail.
22+
23+
Note: PostgreSQL returns ROW types as plain strings, so this conversion isn't needed for
24+
them, but it doesn't hurt since str(string) returns the same string.
1425
"""
1526
try:
27+
# Convert collection types to valid JSON strings for proper frontend rendering.
28+
# Databases like Trino return structured types as Python objects (e.g. NamedRowTuple),
29+
# while PostgreSQL returns them as strings. Using json.dumps() ensures valid JSON
30+
# with double quotes, which the frontend can parse correctly.
31+
if isinstance(value, (dict, list, tuple)):
32+
return json.dumps(value)
1633
return str(value)
1734
except Exception:
1835
return "<unconvertible>"

tests/integration/test_trino.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,105 @@ def test_execute_sql_with_autodetection(self, trino_credentials):
225225
assert len(result) == 1
226226
assert "detected" in result.columns
227227
assert result["detected"].iloc[0] == test_value
228+
229+
def test_execute_sql_with_struct_types(self, trino_toolkit_connection):
230+
"""
231+
Test execute_sql with Trino STRUCT/ROW types
232+
(regression reported in BLU-5140)
233+
234+
Named structs from Trino come through as NamedRowTuple (tuple subclass).
235+
The rendering layer will convert them to JSON via safe_convert_to_string.
236+
"""
237+
query = """
238+
SELECT id, simple_struct FROM (
239+
SELECT
240+
t.id,
241+
CAST(
242+
ROW(
243+
'item_' || CAST(t.id AS VARCHAR),
244+
'value_' || CAST(t.id * 10 AS VARCHAR)
245+
)
246+
AS ROW(a VARCHAR, b VARCHAR)
247+
) AS simple_struct
248+
FROM
249+
UNNEST(SEQUENCE(1, 100)) AS t (id)
250+
)
251+
"""
252+
253+
result = execute_sql(
254+
template=query,
255+
sql_alchemy_json_env_var=trino_toolkit_connection,
256+
)
257+
258+
assert isinstance(result, pd.DataFrame)
259+
assert len(result) == 100
260+
assert "id" in result.columns
261+
assert "simple_struct" in result.columns
262+
263+
# Named structs from Trino come through as NamedRowTuple (tuple subclass)
264+
first_struct = result["simple_struct"].iloc[0]
265+
assert isinstance(
266+
first_struct, tuple
267+
), f"Expected named struct to be tuple, got {type(first_struct)}"
268+
269+
assert len(first_struct) == 2
270+
assert first_struct[0] == "item_1"
271+
assert first_struct[1] == "value_10"
272+
273+
assert first_struct.a == "item_1"
274+
assert first_struct.b == "value_10"
275+
276+
def test_execute_sql_with_array_types(self, trino_toolkit_connection):
277+
"""
278+
Test execute_sql with Trino ARRAY types
279+
(related to BLU-5140)
280+
281+
Arrays from Trino come through as Python lists.
282+
The rendering layer will convert them to JSON via safe_convert_to_string.
283+
Without proper handling, str(list) produces invalid JSON with single quotes.
284+
"""
285+
query = """
286+
SELECT
287+
id,
288+
tags,
289+
nested_array
290+
FROM (
291+
SELECT
292+
t.id,
293+
ARRAY['tag_' || CAST(t.id AS VARCHAR), 'item', 'test'] AS tags,
294+
ARRAY[ARRAY[t.id, t.id * 2], ARRAY[t.id * 3, t.id * 4]] AS nested_array
295+
FROM
296+
UNNEST(SEQUENCE(1, 50)) AS t (id)
297+
)
298+
"""
299+
300+
result = execute_sql(
301+
template=query,
302+
sql_alchemy_json_env_var=trino_toolkit_connection,
303+
)
304+
305+
assert isinstance(result, pd.DataFrame)
306+
assert len(result) == 50
307+
assert "id" in result.columns
308+
assert "tags" in result.columns
309+
assert "nested_array" in result.columns
310+
311+
# Arrays from Trino come through as Python lists
312+
first_tags = result["tags"].iloc[0]
313+
assert isinstance(
314+
first_tags, list
315+
), f"Expected array to be list, got {type(first_tags)}"
316+
317+
assert len(first_tags) == 3
318+
assert first_tags[0] == "tag_1"
319+
assert first_tags[1] == "item"
320+
assert first_tags[2] == "test"
321+
322+
first_nested = result["nested_array"].iloc[0]
323+
assert isinstance(
324+
first_nested, list
325+
), f"Expected nested array to be list, got {type(first_nested)}"
326+
assert len(first_nested) == 2
327+
assert isinstance(first_nested[0], list)
328+
assert first_nested[0] == [1, 2]
329+
assert first_nested[1] == [3, 4]
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import json
2+
3+
import pytest
4+
5+
from deepnote_toolkit.ocelots.pandas.utils import safe_convert_to_string
6+
7+
8+
def test_safe_convert_to_string_dict():
9+
dict_value = {"a": "x", "b": "y"}
10+
result = safe_convert_to_string(dict_value)
11+
12+
assert isinstance(result, str)
13+
parsed = json.loads(result)
14+
assert parsed == dict_value
15+
16+
17+
def test_safe_convert_to_string_tuple():
18+
tuple_value = (1, "x", True)
19+
result = safe_convert_to_string(tuple_value)
20+
21+
assert isinstance(result, str)
22+
parsed = json.loads(result)
23+
assert parsed == [1, "x", True]
24+
25+
26+
def test_safe_convert_to_string_list():
27+
list_value = ["a", "b", "c"]
28+
result = safe_convert_to_string(list_value)
29+
30+
assert isinstance(result, str)
31+
parsed = json.loads(result)
32+
assert parsed == list_value
33+
34+
35+
def test_safe_convert_to_string_nested_structures():
36+
nested_value = {"key": "value", "nested": {"inner": [1, 2, 3]}}
37+
result = safe_convert_to_string(nested_value)
38+
39+
parsed = json.loads(result)
40+
assert parsed == nested_value
41+
42+
43+
def test_safe_convert_to_string_regular_values():
44+
assert safe_convert_to_string("hello") == "hello"
45+
46+
assert safe_convert_to_string(42) == "42"
47+
assert safe_convert_to_string(3.14) == "3.14"
48+
49+
assert safe_convert_to_string(True) == "True"
50+
51+
assert safe_convert_to_string(None) == "None"
52+
53+
54+
def test_safe_convert_to_string_unconvertible():
55+
56+
class UnconvertibleObject:
57+
def __str__(self):
58+
raise ValueError("Cannot convert")
59+
60+
def __repr__(self):
61+
raise ValueError("Cannot represent")
62+
63+
result = safe_convert_to_string(UnconvertibleObject())
64+
assert result == "<unconvertible>"
65+
66+
67+
# Tests for Trino-specific types
68+
def test_safe_convert_to_string_trino_namedrowtuple():
69+
"""Test that Trino's NamedRowTuple is converted to valid JSON strings."""
70+
pytest.importorskip("trino")
71+
from trino.client import NamedRowTuple
72+
73+
# Create a NamedRowTuple with field names and values (as returned by Trino)
74+
row = NamedRowTuple(
75+
values=["item_1", "value_10"], names=["a", "b"], types=[None, None]
76+
)
77+
78+
result = safe_convert_to_string(row)
79+
80+
assert isinstance(result, str)
81+
parsed = json.loads(result)
82+
assert parsed == ["item_1", "value_10"]
83+
assert row.a == "item_1"
84+
assert row.b == "value_10"
85+
86+
87+
def test_safe_convert_to_string_trino_array():
88+
"""Test that Trino arrays (returned as Python lists) are converted to valid JSON."""
89+
90+
# Trino returns ARRAY types as Python lists
91+
trino_array = ["tag_1", "item", "test"]
92+
93+
result = safe_convert_to_string(trino_array)
94+
95+
assert isinstance(result, str)
96+
97+
parsed = json.loads(result)
98+
assert parsed == trino_array
99+
assert '"tag_1"' in result
100+
assert "'tag_1'" not in result
101+
102+
103+
def test_safe_convert_to_string_trino_nested_array():
104+
"""Test that nested Trino arrays are converted to valid JSON."""
105+
106+
# Trino returns nested ARRAY types as nested Python lists
107+
nested_array = [[1, 2], [3, 4]]
108+
109+
result = safe_convert_to_string(nested_array)
110+
111+
parsed = json.loads(result)
112+
assert parsed == nested_array
113+
assert parsed[0] == [1, 2]
114+
assert parsed[1] == [3, 4]

0 commit comments

Comments
 (0)