apache · Arunodoy18 · Jan 30, 2026 · codeant-ai-for-open-source · Jan 30, 2026 · codeant-ai-for-open-source
diff --git a/superset/result_set.py b/superset/result_set.py
@@ -99,6 +99,31 @@ def convert_to_string(value: Any) -> str:
     return str(value)
 
 
+def normalize_column_name(value: Any, index: int) -> str:
+    """
+    Normalize a column name from the cursor description.
+
+    Some databases (e.g., MSSQL) return empty strings for unnamed columns
+    (e.g., SELECT COUNT(*) without an alias). This function ensures every
+    column has a valid, non-empty name by generating a positional fallback
+    name when needed.
+
+    :param value: The column name from cursor.description (can be str, bytes, None, etc.)
+    :param index: The 0-based column position, used to generate fallback names
+    :return: A non-empty string column name
+    """
+    if value is None:
+        return f"_col{index}"
+
+    name = convert_to_string(value)
-    name = convert_to_string(value)
+    try:
+        name = convert_to_string(value)
+    except UnicodeDecodeError:
+        # Fallback for bytes that are not valid UTF-8: replace invalid bytes
+        if isinstance(value, (bytes, bytearray)):
+            name = value.decode("utf-8", errors="replace")
+        else:
+            name = str(value)
-    name = convert_to_string(value)
+    try:
+        name = convert_to_string(value)
+    except UnicodeDecodeError:
+        # Fallback for bytes that are not valid UTF-8: replace invalid bytes
+        if isinstance(value, (bytes, bytearray)):
+            name = value.decode("utf-8", errors="replace")
+        else:
+            name = str(value)
+
+    # Handle empty or whitespace-only names
+    if not name or not name.strip():
+        return f"_col{index}"
+
+    return name
+
+
 class SupersetResultSet:
     def __init__(  # pylint: disable=too-many-locals  # noqa: C901
         self,
@@ -116,8 +141,13 @@ def __init__(  # pylint: disable=too-many-locals  # noqa: C901
 
         if cursor_description:
             # get deduped list of column names
+            # Use normalize_column_name to handle None/empty names from databases
+            # like MSSQL that return empty strings for unnamed columns
             column_names = dedup(
-                [convert_to_string(col[0]) for col in cursor_description]
+                [
+                    normalize_column_name(col[0], idx)
+                    for idx, col in enumerate(cursor_description)
+                ]
             )
-            column_names = dedup(
-                [convert_to_string(col[0]) for col in cursor_description]
-                [
-                    normalize_column_name(col[0], idx)
-                    for idx, col in enumerate(cursor_description)
-                ]
-            )
+            column_names_values: list[str] = []
+            for idx, col in enumerate(cursor_description):
+                try:
+                    name_value = col[0]
+                except Exception:
+                    # description entry missing or not indexable; treat as unnamed
+                    name_value = None
+                column_names_values.append(normalize_column_name(name_value, idx))
+            column_names = dedup(column_names_values)
-            column_names = dedup(
-                [convert_to_string(col[0]) for col in cursor_description]
-                [
-                    normalize_column_name(col[0], idx)
-                    for idx, col in enumerate(cursor_description)
-                ]
-            )
+            column_names_values: list[str] = []
+            for idx, col in enumerate(cursor_description):
+                try:
+                    name_value = col[0]
+                except Exception:
+                    # description entry missing or not indexable; treat as unnamed
+                    name_value = None
+                column_names_values.append(normalize_column_name(name_value, idx))
+            column_names = dedup(column_names_values)
 
             # fix cursor descriptor with the deduped names

diff --git a/tests/unit_tests/result_set_test.py b/tests/unit_tests/result_set_test.py
@@ -25,10 +25,40 @@
 from pytest_mock import MockerFixture
 
 from superset.db_engine_specs.base import BaseEngineSpec
-from superset.result_set import stringify_values, SupersetResultSet
+from superset.result_set import normalize_column_name, stringify_values, SupersetResultSet
 from superset.superset_typing import DbapiResult
 
 
+def test_normalize_column_name_with_valid_string() -> None:
+    """Test that valid string column names are preserved."""
+    assert normalize_column_name("user_id", 0) == "user_id"
+    assert normalize_column_name("COUNT(*)", 1) == "COUNT(*)"
+    assert normalize_column_name("my column", 2) == "my column"
+
+
+def test_normalize_column_name_with_bytes() -> None:
+    """Test that byte column names are decoded."""
+    assert normalize_column_name(b"column_name", 0) == "column_name"
+
+
+def test_normalize_column_name_with_none() -> None:
+    """Test that None column names get positional fallback."""
+    assert normalize_column_name(None, 0) == "_col0"
+    assert normalize_column_name(None, 5) == "_col5"
+
+
+def test_normalize_column_name_with_empty_string() -> None:
+    """Test that empty string column names get positional fallback."""
+    assert normalize_column_name("", 0) == "_col0"
+    assert normalize_column_name("", 3) == "_col3"
+
+
+def test_normalize_column_name_with_whitespace() -> None:
+    """Test that whitespace-only column names get positional fallback."""
+    assert normalize_column_name("   ", 0) == "_col0"
+    assert normalize_column_name("\t\n", 1) == "_col1"
+
+
 def test_column_names_as_bytes() -> None:
     """
     Test that we can handle column names as bytes.
@@ -185,3 +215,175 @@ def test_get_column_description_from_empty_data_using_cursor_description(
     )
     assert any(col.get("column_name") == "__time" for col in result_set.columns)
     logger.exception.assert_not_called()
+
+
+def test_unnamed_columns_get_fallback_names() -> None:
+    """
+    Test that unnamed columns (empty string or None) get deterministic fallback names.
+
+    This addresses issue #23848 where MSSQL returns empty strings for unnamed columns
+    (e.g., SELECT COUNT(*) without an alias), causing failures in SQL Lab, alerts,
+    and dimension creation.
+    """
+    # Simulate MSSQL behavior: empty string for unnamed column
+    data = [(42,), (100,)]
+    description = [("", "int", None, None, None, None, None)]  # Empty column name
+
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    # Should have generated a fallback name
+    assert result_set.columns[0]["column_name"] == "_col0"
+    assert result_set.columns[0]["name"] == "_col0"
+
+    # Data should be accessible
+    df = result_set.to_pandas_df()
+    assert df.columns.tolist() == ["_col0"]
+    assert df["_col0"].tolist() == [42, 100]
+
+
+def test_none_column_name_gets_fallback() -> None:
+    """
+    Test that None column names get deterministic fallback names.
+    """
+    data = [("value1",), ("value2",)]
+    description = [(None, "varchar", None, None, None, None, None)]
+
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    assert result_set.columns[0]["column_name"] == "_col0"
+    df = result_set.to_pandas_df()
+    assert df["_col0"].tolist() == ["value1", "value2"]
+
+
+def test_whitespace_only_column_name_gets_fallback() -> None:
+    """
+    Test that whitespace-only column names get deterministic fallback names.
+    """
+    data = [(1,), (2,)]
+    description = [("   ", "int", None, None, None, None, None)]
+
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    assert result_set.columns[0]["column_name"] == "_col0"
+
+
+def test_mixed_named_and_unnamed_columns() -> None:
+    """
+    Test that named columns are preserved while unnamed columns get fallback names.
+
+    Simulates: SELECT id, COUNT(*), name FROM table
+    where COUNT(*) has no alias on MSSQL.
+    """
+    data = [(1, 42, "Alice"), (2, 17, "Bob")]
+    description = [
+        ("id", "int", None, None, None, None, None),
+        ("", "int", None, None, None, None, None),  # COUNT(*) without alias
+        ("name", "varchar", None, None, None, None, None),
+    ]
+
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    # Named columns should be preserved exactly
+    assert result_set.columns[0]["column_name"] == "id"
+    assert result_set.columns[2]["column_name"] == "name"
+
+    # Unnamed column should get positional fallback
+    assert result_set.columns[1]["column_name"] == "_col1"
+
+    # Verify data integrity
+    df = result_set.to_pandas_df()
+    assert df.columns.tolist() == ["id", "_col1", "name"]
+    assert df["id"].tolist() == [1, 2]
+    assert df["_col1"].tolist() == [42, 17]
+    assert df["name"].tolist() == ["Alice", "Bob"]
+
+
+def test_multiple_unnamed_columns() -> None:
+    """
+    Test that multiple unnamed columns each get unique fallback names.
+
+    Simulates: SELECT COUNT(*), SUM(x), AVG(y) FROM table
+    without any aliases on MSSQL.
+    """
+    data = [(10, 100, 5.5)]
+    description = [
+        ("", "int", None, None, None, None, None),
+        ("", "int", None, None, None, None, None),
+        ("", "float", None, None, None, None, None),
+    ]
+
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    # Each unnamed column should get a unique positional name
+    # Note: dedup() will handle any collisions, but since we use position-based
+    # names, there shouldn't be collisions
+    column_names = [col["column_name"] for col in result_set.columns]
+    assert column_names == ["_col0", "_col1", "_col2"]
+
+    # Verify data is accessible
+    df = result_set.to_pandas_df()
+    assert df["_col0"].tolist() == [10]
+    assert df["_col1"].tolist() == [100]
+    assert df["_col2"].tolist() == [5.5]
+
+
+def test_named_columns_not_modified() -> None:
+    """
+    Test that explicitly named columns are never modified.
+
+    This ensures the fix doesn't accidentally change behavior for well-formed queries.
+    """
+    data = [(1, "test", 3.14)]
+    description = [
+        ("user_id", "int", None, None, None, None, None),
+        ("username", "varchar", None, None, None, None, None),
+        ("score", "float", None, None, None, None, None),
+    ]
+
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    column_names = [col["column_name"] for col in result_set.columns]
+    assert column_names == ["user_id", "username", "score"]
+
+
+def test_empty_result_with_unnamed_columns() -> None:
+    """
+    Test that empty results with unnamed columns still work correctly.
+
+    This is important for SQL Lab's display of column headers even when
+    the query returns no rows.
+    """
+    data: DbapiResult = []
+    description = [
+        ("", "int", None, None, None, None, None),
+        ("named_col", "varchar", None, None, None, None, None),
+    ]
+
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    column_names = [col["column_name"] for col in result_set.columns]
+    assert column_names == ["_col0", "named_col"]
+
+
+def test_aliased_expression_preserved() -> None:
+    """
+    Test that explicitly aliased expressions (e.g., SELECT COUNT(*) AS total)
+    preserve the alias name exactly.
+
+    This verifies the fix for #23848 doesn't accidentally affect properly
+    aliased columns.
+    """
+    # Simulates: SELECT COUNT(*) AS total FROM table
+    # The database returns "total" as the column name
+    data = [(42,)]
+    description = [("total", "int", None, None, None, None, None)]
+
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: ignore
+
+    # Alias must be preserved exactly
+    assert result_set.columns[0]["column_name"] == "total"
+    assert result_set.columns[0]["name"] == "total"
+
+    df = result_set.to_pandas_df()
+    assert df.columns.tolist() == ["total"]
+    assert df["total"].tolist() == [42]