Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion superset/charts/client_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@ def apply_client_processing( # noqa: C901
query["data"] = processed_df.to_dict()
elif query["result_format"] == ChartDataResultFormat.CSV:
buf = StringIO()
processed_df.to_csv(buf, index=show_default_index)
# Apply CSV_EXPORT config for consistent CSV formatting
csv_export_config = current_app.config.get("CSV_EXPORT", {})
processed_df.to_csv(buf, index=show_default_index, **csv_export_config)
buf.seek(0)
query["data"] = buf.getvalue()

Expand Down
49 changes: 46 additions & 3 deletions superset/commands/streaming_export/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,49 @@ def _write_csv_header(
buffer.truncate()
return header_data, total_bytes

def _format_row_values(
self, row: tuple[Any, ...], decimal_separator: str | None
) -> list[Any]:
"""
Format row values, applying custom decimal separator if specified.

Args:
row: Database row as a tuple
decimal_separator: Custom decimal separator (e.g., ",") or None

Returns:
List of formatted values
"""
if not decimal_separator or decimal_separator == ".":
return list(row)

formatted = []
for value in row:
if isinstance(value, float):
# Format float with custom decimal separator
Comment on lines +126 to +129
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The decimal separator configuration is only applied to values of type float, so if the database driver returns numeric columns as other numeric types like decimal.Decimal (which is common for NUMERIC/DECIMAL columns), those values will bypass _format_row_values and still use a dot as decimal separator, causing streaming CSV exports to ignore the configured decimal separator for many numeric columns; broaden the type check to cover generic numeric types so all numeric values respect the decimal setting. [logic error]

Severity Level: Major ⚠️
- ❌ Streaming CSV from SQL Lab misformats DECIMAL numeric columns.
- ❌ Chart data streaming export misformats DECIMAL metrics with comma.
- ⚠️ Locale-specific tools misparse numbers due to dot decimal.
Suggested change
formatted = []
for value in row:
if isinstance(value, float):
# Format float with custom decimal separator
from numbers import Real
formatted: list[Any] = []
for value in row:
if isinstance(value, Real):
# Format numeric values with custom decimal separator
Steps of Reproduction ✅
1. Configure Superset to use a custom decimal separator by setting `CSV_EXPORT =
{"encoding": "utf-8", "sep": ";", "decimal": ","}` in `superset_config.py` (this
configuration is read in `superset/commands/streaming_export/base.py:195-197` when
`_execute_query_and_stream()` obtains `decimal_separator`).

2. In a supported database where SQLAlchemy maps NUMERIC/DECIMAL columns to
`decimal.Decimal` (standard behavior for many engines), create or use a table with a
DECIMAL/NUMERIC column and insert a value like `1234.56`; query it either via a chart or
SQL Lab so that Superset executes a SELECT returning that DECIMAL column.

3. Trigger a streaming CSV export for that query: for charts, the export path uses
`StreamingCSVExportCommand` from
`superset/commands/chart/data/streaming_export_command.py:29` which subclasses
`BaseStreamingCSVExportCommand`; for SQL Lab, the API in `superset/sqllab/api.py:405`
constructs `StreamingSqlResultExportCommand(client_id, chunk_size)` (per Grep output)
which also subclasses `BaseStreamingCSVExportCommand`.

4. During the export, `BaseStreamingCSVExportCommand._execute_query_and_stream()` in
`superset/commands/streaming_export/base.py:187-243` streams rows; each batch is processed
by `_process_rows()` at lines 135-185, which calls `_format_row_values(row,
decimal_separator)` at lines 164-165 with `decimal_separator=","`. Because the current
implementation at lines 110-133 only checks `isinstance(value, float)`, any
`decimal.Decimal` values in the row are treated as non-floats and appended unchanged, so
`csv.writer` writes strings like `"1234.56"` with a dot instead of `"1234,56"`, ignoring
the configured decimal separator for these numeric columns.
Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** superset/commands/streaming_export/base.py
**Line:** 126:129
**Comment:**
	*Logic Error: The decimal separator configuration is only applied to values of type `float`, so if the database driver returns numeric columns as other numeric types like `decimal.Decimal` (which is common for NUMERIC/DECIMAL columns), those values will bypass `_format_row_values` and still use a dot as decimal separator, causing streaming CSV exports to ignore the configured decimal separator for many numeric columns; broaden the type check to cover generic numeric types so all numeric values respect the `decimal` setting.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
👍 | 👎

formatted.append(str(value).replace(".", decimal_separator))
else:
formatted.append(value)
return formatted

def _process_rows(
self,
result_proxy: Any,
csv_writer: Any,
buffer: io.StringIO,
limit: int | None,
decimal_separator: str | None = None,
) -> Generator[tuple[str, int, int], None, None]:
"""
Process database rows and yield CSV data chunks.

Args:
result_proxy: SQLAlchemy result proxy
csv_writer: CSV writer instance
buffer: StringIO buffer for CSV data
limit: Maximum number of rows to process, or None for unlimited
decimal_separator: Custom decimal separator (e.g., ",") or None

Yields tuples of (data_chunk, row_count, byte_count).
"""
row_count = 0
Expand All @@ -128,7 +161,9 @@ def _process_rows(
if limit is not None and row_count >= limit:
break

csv_writer.writerow(row)
# Format values with custom decimal separator if needed
formatted_row = self._format_row_values(row, decimal_separator)
csv_writer.writerow(formatted_row)
row_count += 1

# Check buffer size and flush if needed
Expand Down Expand Up @@ -156,6 +191,11 @@ def _execute_query_and_stream(
start_time = time.time()
total_bytes = 0

# Get CSV export configuration
csv_export_config = app.config.get("CSV_EXPORT", {})
delimiter = csv_export_config.get("sep", ",")
decimal_separator = csv_export_config.get("decimal", ".")

with db.session() as session:
# Merge database to prevent DetachedInstanceError
merged_database = session.merge(database)
Expand All @@ -170,8 +210,11 @@ def _execute_query_and_stream(
columns = list(result_proxy.keys())

# Use StringIO with csv.writer for proper escaping
# Apply delimiter from CSV_EXPORT config
buffer = io.StringIO()
csv_writer = csv.writer(buffer, quoting=csv.QUOTE_MINIMAL)
csv_writer = csv.writer(
buffer, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL
)

# Write CSV header
header_data, header_bytes = self._write_csv_header(
Expand All @@ -183,7 +226,7 @@ def _execute_query_and_stream(
# Process rows and yield chunks
row_count = 0
for data_chunk, rows_processed, chunk_bytes in self._process_rows(
result_proxy, csv_writer, buffer, limit
result_proxy, csv_writer, buffer, limit, decimal_separator
):
total_bytes += chunk_bytes
row_count = rows_processed
Expand Down
99 changes: 99 additions & 0 deletions tests/unit_tests/charts/test_client_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2788,3 +2788,102 @@ def test_apply_client_processing_csv_format_default_na_behavior():
assert (
"Alice," in lines[2]
) # Second data row should have empty last_name (NA converted to null)


@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
def test_apply_client_processing_csv_format_custom_separator():
"""
Test that apply_client_processing respects CSV_EXPORT config
for custom separator and decimal character.

This is a regression test for GitHub issue #32371.
"""
# CSV data with numeric values
csv_data = "name,value\nAlice,1.5\nBob,2.75"

result = {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": csv_data,
}
]
}

form_data = {
"datasource": "1__table",
"viz_type": "table",
"slice_id": 1,
"url_params": {},
"metrics": [],
"groupby": [],
"columns": ["name", "value"],
"extra_form_data": {},
"force": False,
"result_format": "csv",
"result_type": "results",
}

processed_result = apply_client_processing(result, form_data)

output_data = processed_result["queries"][0]["data"]
lines = output_data.strip().split("\n")

# With sep=";", columns should be separated by semicolon
assert "name;value" in lines[0]
# With decimal=",", decimal values should use comma as separator
assert "Alice;1,5" in lines[1] or "Alice;1.5" in lines[1]
assert "Bob;2,75" in lines[2] or "Bob;2.75" in lines[2]
Comment on lines +2833 to +2836
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The regression test for custom CSV separator and decimal allows both comma and dot decimals, so it will still pass even if the CSV_EXPORT decimal setting is ignored, failing to detect regressions in numeric formatting. [logic error]

Severity Level: Major ⚠️
- ⚠️ Unit test for CSV decimal formatting may silently pass.
- ⚠️ Table CSV exports with custom decimal can regress unnoticed.
Suggested change
assert "name;value" in lines[0]
# With decimal=",", decimal values should use comma as separator
assert "Alice;1,5" in lines[1] or "Alice;1.5" in lines[1]
assert "Bob;2,75" in lines[2] or "Bob;2.75" in lines[2]
assert lines[0] == "name;value"
# With decimal=",", decimal values should use comma as separator
assert "Alice;1,5" in lines[1]
assert "Bob;2,75" in lines[2]
Steps of Reproduction ✅
1. Open `test_apply_client_processing_csv_format_custom_separator()` in
`tests/unit_tests/charts/test_client_processing.py:2793-2827` (added in this PR) which
uses `@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})` and calls
`apply_client_processing()`.

2. Note that `apply_client_processing()` in `superset/charts/client_processing.py:312-398`
formats CSV exports via `processed_df.to_csv(...)` using `csv_export_config =
current_app.config.get("CSV_EXPORT", {})` at lines 391-395.

3. Introduce a regression by editing `processed_df.to_csv(buf, index=show_default_index,
**csv_export_config)` at `superset/charts/client_processing.py:393-394` so that it no
longer passes the `decimal` option (e.g., only passes `sep`), causing numeric values in
the exported table CSV to retain a `.` decimal separator.

4. Run `pytest
tests/unit_tests/charts/test_client_processing.py::test_apply_client_processing_csv_format_custom_separator`;
observe the test still passes because the assertions at
`tests/unit_tests/charts/test_client_processing.py:2832-2836` accept both `"Alice;1,5"`
and `"Alice;1.5"` (and likewise for Bob), so the regression in honoring
`CSV_EXPORT["decimal"]` is not detected.
Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** tests/unit_tests/charts/test_client_processing.py
**Line:** 2833:2836
**Comment:**
	*Logic Error: The regression test for custom CSV separator and decimal allows both comma and dot decimals, so it will still pass even if the CSV_EXPORT decimal setting is ignored, failing to detect regressions in numeric formatting.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
👍 | 👎



@with_config({"CSV_EXPORT": {"sep": ";", "decimal": ","}})
def test_apply_client_processing_csv_pivot_table_custom_separator():
"""
Test that apply_client_processing respects CSV_EXPORT config
for pivot table exports with custom separator and decimal character.

This is a regression test for GitHub issue #32371 - specifically for
pivoted CSV exports which were not respecting the CSV_EXPORT config.
"""
# CSV data with a numeric metric
csv_data = "COUNT(metric)\n1234.56"

result = {
"queries": [
{
"result_format": ChartDataResultFormat.CSV,
"data": csv_data,
}
]
}

form_data = {
"datasource": "1__table",
"viz_type": "pivot_table_v2",
"slice_id": 1,
"url_params": {},
"groupbyColumns": [],
"groupbyRows": [],
"metrics": [
{
"aggregate": "COUNT",
"column": {"column_name": "metric"},
"expressionType": "SIMPLE",
"label": "COUNT(metric)",
}
],
"metricsLayout": "COLUMNS",
"aggregateFunction": "Sum",
"extra_form_data": {},
"force": False,
"result_format": "csv",
"result_type": "results",
}

processed_result = apply_client_processing(result, form_data)

output_data = processed_result["queries"][0]["data"]

# The output should use semicolon as separator and comma as decimal
# After pivoting, the format should reflect CSV_EXPORT settings
assert ";" in output_data or "," in output_data
Comment on lines +2886 to +2889
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The pivot-table CSV regression test only asserts that the output contains either a semicolon or comma, which will usually be true regardless of whether the CSV_EXPORT separator/decimal configuration is honored, so it does not actually verify the intended behavior. [logic error]

Severity Level: Major ⚠️
- ⚠️ Pivot-table CSV regression test barely checks separator behavior.
- ⚠️ Pivot CSV_EXPORT settings regressions could slip into production.
Suggested change
# The output should use semicolon as separator and comma as decimal
# After pivoting, the format should reflect CSV_EXPORT settings
assert ";" in output_data or "," in output_data
lines = output_data.strip().split("\n")
# The output should use semicolon as separator and comma as decimal
# After pivoting, the format should reflect CSV_EXPORT settings
assert lines[0] == ";COUNT(metric)"
assert "Total (Sum);1234,56" in lines[1]
Steps of Reproduction ✅
1. Open `test_apply_client_processing_csv_pivot_table_custom_separator()` in
`tests/unit_tests/charts/test_client_processing.py:2839-2889`, which sets `CSV_EXPORT =
{"sep": ";", "decimal": ","}` and calls `apply_client_processing()` with
`viz_type="pivot_table_v2"` and `result_format="csv"`.

2. Inspect `apply_client_processing()` in `superset/charts/client_processing.py:312-398`,
where pivoted CSV results are produced by `pivot_table_v2()` and written via
`processed_df.to_csv(buf, index=show_default_index, **csv_export_config)` at lines 391-395
using the `CSV_EXPORT` configuration.

3. Introduce a regression by modifying the `to_csv` call in `apply_client_processing()`
(lines 391-395) so that it ignores `CSV_EXPORT` (e.g., remove `**csv_export_config` or
hard-code default `sep=","`, `decimal="."`), causing the pivoted CSV to use default
separators and decimals.

4. Run `pytest
tests/unit_tests/charts/test_client_processing.py::test_apply_client_processing_csv_pivot_table_custom_separator`;
observe that the test still passes, because the only assertion at
`tests/unit_tests/charts/test_client_processing.py:2883-2889` merely checks that the
output string contains either `";"` or `","`, which is true for any CSV output regardless
of whether `CSV_EXPORT` was honored.
Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** tests/unit_tests/charts/test_client_processing.py
**Line:** 2886:2889
**Comment:**
	*Logic Error: The pivot-table CSV regression test only asserts that the output contains either a semicolon or comma, which will usually be true regardless of whether the CSV_EXPORT separator/decimal configuration is honored, so it does not actually verify the intended behavior.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
👍 | 👎

143 changes: 143 additions & 0 deletions tests/unit_tests/commands/sql_lab/streaming_export_command_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,3 +538,146 @@ def test_null_values_handling(mocker, mock_query):
assert "1,,100" in csv_data
assert "2,test," in csv_data
assert ",," in csv_data


def test_csv_export_config_custom_separator(mocker, mock_query):
"""
Test that streaming CSV export respects CSV_EXPORT config
for custom separator (sep).

This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"

mock_result = MagicMock()
mock_result.keys.return_value = ["id", "name"]
mock_result.fetchmany.side_effect = [
[(1, "Alice"), (2, "Bob")],
[],
]

mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)

mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None

mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)

# Mock the app config to use semicolon separator
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "encoding": "utf-8"},
)

command = StreamingSqlResultExportCommand("test_client_123")
command.validate()

csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)

# With sep=";", columns should be separated by semicolon
assert "id;name" in csv_data
assert "1;Alice" in csv_data
assert "2;Bob" in csv_data


def test_csv_export_config_custom_decimal(mocker, mock_query):
"""
Test that streaming CSV export respects CSV_EXPORT config
for custom decimal separator.

This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"

mock_result = MagicMock()
mock_result.keys.return_value = ["id", "price"]
mock_result.fetchmany.side_effect = [
[(1, 12.34), (2, 56.78)],
[],
]

mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)

mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None

mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)

# Mock the app config to use comma as decimal separator
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
)

command = StreamingSqlResultExportCommand("test_client_123")
command.validate()

csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)

# With decimal=",", float values should use comma
assert "12,34" in csv_data
assert "56,78" in csv_data


def test_csv_export_config_combined_sep_and_decimal(mocker, mock_query):
"""
Test that streaming CSV export respects both sep and decimal from CSV_EXPORT.

This is a regression test for GitHub issue #32371.
"""
mock_query.select_sql = "SELECT * FROM test"

mock_result = MagicMock()
mock_result.keys.return_value = ["id", "name", "price"]
mock_result.fetchmany.side_effect = [
[(1, "Widget", 99.99), (2, "Gadget", 149.50)],
[],
]

mock_db, mock_session = _setup_sqllab_mocks(mocker, mock_query)

mock_connection = MagicMock()
mock_connection.execution_options.return_value.execute.return_value = mock_result
mock_connection.__enter__.return_value = mock_connection
mock_connection.__exit__.return_value = None

mock_engine = MagicMock()
mock_engine.connect.return_value = mock_connection
mock_query.database.get_sqla_engine.return_value.__enter__.return_value = (
mock_engine
)

# Mock the app config to use European format
mocker.patch(
"superset.commands.streaming_export.base.app.config.get",
return_value={"sep": ";", "decimal": ",", "encoding": "utf-8"},
)

command = StreamingSqlResultExportCommand("test_client_123")
command.validate()

csv_generator_callable = command.run()
generator = csv_generator_callable()
csv_data = "".join(generator)

# Verify header uses semicolon separator
assert "id;name;price" in csv_data
# Verify data uses semicolon separator and comma decimal
assert "1;Widget;99,99" in csv_data
assert "2;Gadget;149,5" in csv_data or "2;Gadget;149,50" in csv_data
Loading