apache · rusackas · Feb 22, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/superset/dataframe.py b/superset/dataframe.py
@@ -37,6 +37,22 @@ def _convert_big_integers(val: Any) -> Any:
     return str(val) if isinstance(val, int) and abs(val) > JS_MAX_INTEGER else val
 
 
+def _is_na(val: Any) -> bool:
+    """
+    Check if a value is NA/NaN for scalar values only.
+
+    pd.isna() raises ValueError for arrays/lists, so we catch that case.
+
+    :param val: the value to check
+    :returns: True if the value is NA/NaN, False otherwise
+    """
+    try:
+        return bool(pd.isna(val))
+    except ValueError:
+        # pd.isna raises ValueError for arrays (e.g., lists, dicts from JSON)
+        return False
+
+
 def df_to_records(dframe: pd.DataFrame) -> list[dict[str, Any]]:
     """
     Convert a DataFrame to a set of records.
@@ -56,7 +72,7 @@ def df_to_records(dframe: pd.DataFrame) -> list[dict[str, Any]]:
     for record in records:
         for key in record:
             record[key] = (
-                None if pd.isna(record[key]) else _convert_big_integers(record[key])
+                None if _is_na(record[key]) else _convert_big_integers(record[key])
             )
 
     return records
diff --git a/superset/result_set.py b/superset/result_set.py
@@ -113,6 +113,8 @@ def __init__(  # pylint: disable=too-many-locals  # noqa: C901
         deduped_cursor_desc: list[tuple[Any, ...]] = []
         numpy_dtype: list[tuple[str, ...]] = []
         stringified_arr: NDArray[Any]
+        # Track columns with nested/JSON data to preserve them as objects
+        self._nested_columns: dict[str, list[Any]] = {}
 
         if cursor_description:
             # get deduped list of column names
@@ -147,16 +149,29 @@ def __init__(  # pylint: disable=too-many-locals  # noqa: C901
                 TypeError,  # this is super hackey,
                 # https://issues.apache.org/jira/browse/ARROW-7855
             ):
+                # Check if original data has nested types (lists/dicts)
+                # before stringifying, since stringification removes
+                # the nested structure that the second loop relies on
+                # to detect via pa.types.is_nested().
+                original_values = array[column].tolist()
+                if any(
+                    isinstance(v, (list, dict))
+                    for v in original_values
+                    if v is not None
+                ):
+                    self._nested_columns[column] = original_values
                 # attempt serialization of values as strings
                 stringified_arr = stringify_values(array[column])
                 pa_data.append(pa.array(stringified_arr.tolist()))
 
         if pa_data:  # pylint: disable=too-many-nested-blocks
             for i, column in enumerate(column_names):
                 if pa.types.is_nested(pa_data[i].type):
-                    # TODO: revisit nested column serialization once nested types
-                    #  are added as a natively supported column type in Superset
-                    #  (superset.utils.core.GenericDataType).
+                    # Preserve nested/JSON data as Python objects for use in
+                    # templates like Handlebars. Store original values before
+                    # stringifying for PyArrow compatibility.
+                    # See: https://github.com/apache/superset/issues/25125
+                    self._nested_columns[column] = array[column].tolist()
                     stringified_arr = stringify_values(array[column])
                     pa_data[i] = pa.array(stringified_arr.tolist())
 
@@ -247,7 +262,13 @@ def data_type(self, col_name: str, pa_dtype: pa.DataType) -> Optional[str]:
         return None
 
     def to_pandas_df(self) -> pd.DataFrame:
-        return self.convert_table_to_df(self.table)
+        df = self.convert_table_to_df(self.table)
+        # Restore nested/JSON columns as Python objects instead of strings
+        # This allows JSON data to be used directly in templates like Handlebars
+        for column, values in self._nested_columns.items():
+            if column in df.columns:
+                df[column] = values
+        return df
 
     @property
     def pa_table(self) -> pa.Table:

diff --git a/tests/integration_tests/result_set_tests.py b/tests/integration_tests/result_set_tests.py
@@ -226,18 +226,19 @@ def test_nested_types(self):
         assert results.columns[3]["type"] == "STRING"
         assert results.columns[3]["type_generic"] == GenericDataType.STRING
         df = results.to_pandas_df()
+        # JSON/JSONB data is preserved as objects instead of being stringified
         assert df_to_records(df) == [
             {
                 "id": 4,
-                "dict_arr": '[{"table_name": "unicode_test", "database_id": 1}]',
-                "num_arr": "[1, 2, 3]",
-                "map_col": "{'chart_name': 'scatter'}",
+                "dict_arr": [{"table_name": "unicode_test", "database_id": 1}],
+                "num_arr": [1, 2, 3],
+                "map_col": {"chart_name": "scatter"},
             },
             {
                 "id": 3,
-                "dict_arr": '[{"table_name": "birth_names", "database_id": 1}]',
-                "num_arr": "[4, 5, 6]",
-                "map_col": "{'chart_name': 'plot'}",
+                "dict_arr": [{"table_name": "birth_names", "database_id": 1}],
+                "num_arr": [4, 5, 6],
+                "map_col": {"chart_name": "plot"},
             },
         ]
 
@@ -267,9 +268,25 @@ def test_single_column_multidim_nested_types(self):
         assert results.columns[0]["type"] == "STRING"
         assert results.columns[0]["type_generic"] == GenericDataType.STRING
         df = results.to_pandas_df()
+        # JSON/JSONB data is preserved as objects instead of being stringified
         assert df_to_records(df) == [
             {
-                "metadata": '["test", [["foo", 123456, [[["test"], 3432546, 7657658766], [["fake"], 656756765, 324324324324]]]], ["test2", 43, 765765765], null, null]'  # noqa: E501
+                "metadata": [
+                    "test",
+                    [
+                        [
+                            "foo",
+                            123456,
+                            [
+                                [["test"], 3432546, 7657658766],
+                                [["fake"], 656756765, 324324324324],
+                            ],
+                        ]
+                    ],
+                    ["test2", 43, 765765765],
+                    None,
+                    None,
+                ]
             }
         ]
 
@@ -280,7 +297,8 @@ def test_nested_list_types(self):
         assert results.columns[0]["type"] == "STRING"
         assert results.columns[0]["type_generic"] == GenericDataType.STRING
         df = results.to_pandas_df()
-        assert df_to_records(df) == [{"metadata": '[{"TestKey": [123456, "foo"]}]'}]
+        # JSON/JSONB data is preserved as objects instead of being stringified
+        assert df_to_records(df) == [{"metadata": [{"TestKey": [123456, "foo"]}]}]
 
     def test_empty_datetime(self):
         data = [(None,)]

diff --git a/tests/unit_tests/result_set_test.py b/tests/unit_tests/result_set_test.py
@@ -185,3 +185,91 @@ def test_get_column_description_from_empty_data_using_cursor_description(
     )
     assert any(col.get("column_name") == "__time" for col in result_set.columns)
     logger.exception.assert_not_called()
+
+
+def test_json_data_type_preserved_as_objects() -> None:
+    """
+    Test that JSON/JSONB data is preserved as Python objects (dicts/lists)
+    instead of being converted to strings.
+
+    This is important for Handlebars templates and other features that need
+    to access JSON data as objects rather than strings.
+
+    See: https://github.com/apache/superset/issues/25125
+    """
+    # Simulate data from PostgreSQL JSONB column - psycopg2 returns dicts
+    data = [
+        (1, {"key": "value1", "nested": {"a": 1}}, "text1"),
+        (2, {"key": "value2", "items": [1, 2, 3]}, "text2"),
+        (3, None, "text3"),
+        (4, {"mixed": "string"}, "text4"),
+    ]
+    description = [
+        ("id", 23, None, None, None, None, None),  # INT
+        ("json_col", 3802, None, None, None, None, None),  # JSONB
+        ("text_col", 1043, None, None, None, None, None),  # VARCHAR
+    ]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+    df = result_set.to_pandas_df()
+
+    # JSON column should be preserved as Python objects, not strings
+    assert df["json_col"].iloc[0] == {"key": "value1", "nested": {"a": 1}}
+    assert isinstance(df["json_col"].iloc[0], dict)
+    assert df["json_col"].iloc[1] == {"key": "value2", "items": [1, 2, 3]}
+    assert df["json_col"].iloc[2] is None
+    assert df["json_col"].iloc[3] == {"mixed": "string"}
+
+    # Verify the data can be serialized to JSON (as it would be for API response)
+    from superset.utils import json as superset_json
+
+    records = df.to_dict(orient="records")
+    json_output = superset_json.dumps(records)
+    parsed = superset_json.loads(json_output)
+    assert parsed[0]["json_col"]["key"] == "value1"
+    assert parsed[0]["json_col"]["nested"]["a"] == 1
+    assert parsed[1]["json_col"]["items"] == [1, 2, 3]
+
+
+def test_json_data_with_homogeneous_structure() -> None:
+    """
+    Test that JSON data with consistent structure is also preserved as objects.
+    """
+    # All rows have the same JSON structure
+    data = [
+        (1, {"name": "Alice", "age": 30}),
+        (2, {"name": "Bob", "age": 25}),
+        (3, {"name": "Charlie", "age": 35}),
+    ]
+    description = [
+        ("id", 23, None, None, None, None, None),
+        ("data", 3802, None, None, None, None, None),
+    ]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+    df = result_set.to_pandas_df()
+
+    # Should be preserved as dicts
+    assert isinstance(df["data"].iloc[0], dict)
+    assert df["data"].iloc[0]["name"] == "Alice"
+    assert df["data"].iloc[1]["age"] == 25
+
+
+def test_array_data_type_preserved() -> None:
+    """
+    Test that array data is also preserved as Python lists.
+    """
+    data = [
+        (1, [1, 2, 3]),
+        (2, [4, 5, 6]),
+        (3, None),
+    ]
+    description = [
+        ("id", 23, None, None, None, None, None),
+        ("arr", 1007, None, None, None, None, None),  # INT ARRAY
+    ]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+    df = result_set.to_pandas_df()
+
+    # Arrays should be preserved as lists
+    assert df["arr"].iloc[0] == [1, 2, 3]
+    assert isinstance(df["arr"].iloc[0], list)
+    assert df["arr"].iloc[2] is None