-
Notifications
You must be signed in to change notification settings - Fork 14.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(mysql): handle string typed decimal results #24241
Changes from 3 commits
14b6155
f5a2f73
1249654
261baf4
12b7452
0f1fd48
9659991
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -314,6 +314,10 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods | |||||||||||
# engine-specific type mappings to check prior to the defaults | ||||||||||||
column_type_mappings: Tuple[ColumnTypeMapping, ...] = () | ||||||||||||
|
||||||||||||
# type-specific functions to mutate values received from the database. | ||||||||||||
# Needed on certain databases that return values in an unexpected format | ||||||||||||
column_type_mutators: dict[TypeEngine, Callable[[Any], Any]] = {} | ||||||||||||
|
||||||||||||
# Does database support join-free timeslot grouping | ||||||||||||
time_groupby_inline = False | ||||||||||||
limit_method = LimitMethod.FORCE_LIMIT | ||||||||||||
|
@@ -737,7 +741,27 @@ def fetch_data( | |||||||||||
try: | ||||||||||||
if cls.limit_method == LimitMethod.FETCH_MANY and limit: | ||||||||||||
return cursor.fetchmany(limit) | ||||||||||||
return cursor.fetchall() | ||||||||||||
data = cursor.fetchall() | ||||||||||||
description = cursor.description or [] | ||||||||||||
column_type_mutators = { | ||||||||||||
row[0]: func | ||||||||||||
for row in description | ||||||||||||
if ( | ||||||||||||
func := cls.column_type_mutators.get( | ||||||||||||
type(cls.get_sqla_column_type(cls.get_datatype(row[1]))) | ||||||||||||
) | ||||||||||||
) | ||||||||||||
} | ||||||||||||
if column_type_mutators: | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typically decimal is a fixed point data structure but native From the screenshot before and after, I saw the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch about the Wrt to the query performance hit, another option is to make the frontend more resilient to receiving various types. In this case we could cast the value to I'd be curious to hear other peoples thoughts, @michael-s-molina @john-bodley any thoughts here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It hit me, that without this change, the linked issue won't be fixed, as CSV export happens purely in the backend. With this new pattern we could even normalize non-standard timestamps early in the flow, eliminating the need for doing it in the frontend, further decoupling the quirks of the analytical databases from the downstream logic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Btw, I just noticed we already have logic like this in the codebase: superset/superset/db_engine_specs/ocient.py Lines 330 to 334 in 50535e4
|
||||||||||||
indexes = {row[0]: idx for idx, row in enumerate(description)} | ||||||||||||
for row_idx, row in enumerate(data): | ||||||||||||
new_row = list(row) | ||||||||||||
for col, func in column_type_mutators.items(): | ||||||||||||
col_idx = indexes[col] | ||||||||||||
new_row[col_idx] = func(row[col_idx]) | ||||||||||||
data[row_idx] = tuple(new_row) | ||||||||||||
|
||||||||||||
return data | ||||||||||||
except Exception as ex: | ||||||||||||
raise cls.get_dbapi_mapped_exception(ex) from ex | ||||||||||||
|
||||||||||||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -15,9 +15,12 @@ | |||||||
# specific language governing permissions and limitations | ||||||||
# under the License. | ||||||||
# pylint: disable=unused-argument, import-outside-toplevel, protected-access | ||||||||
from __future__ import annotations | ||||||||
|
||||||||
import json | ||||||||
from datetime import datetime | ||||||||
from typing import Any, Dict, Optional, Type | ||||||||
from decimal import Decimal | ||||||||
from typing import Any, Dict, Optional, Type, Union | ||||||||
from unittest.mock import Mock, patch | ||||||||
|
||||||||
import pandas as pd | ||||||||
|
@@ -366,3 +369,37 @@ def test_handle_cursor_early_cancel( | |||||||
assert cancel_query_mock.call_args[1]["cancel_query_id"] == query_id | ||||||||
else: | ||||||||
assert cancel_query_mock.call_args is None | ||||||||
|
||||||||
|
||||||||
@pytest.mark.parametrize( | ||||||||
"data,description,expected_result", | ||||||||
[ | ||||||||
( | ||||||||
[["1.23456", "abc"]], | ||||||||
[("dec", "decimal(12,6)"), ("str", "varchar(3)")], | ||||||||
[(Decimal("1.23456"), "abc")], | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Notice how we're changing from superset/superset/result_set.py Lines 132 to 134 in 50535e4
|
||||||||
), | ||||||||
( | ||||||||
[[Decimal("1.23456"), "abc"]], | ||||||||
[("dec", "decimal(12,6)"), ("str", "varchar(3)")], | ||||||||
[(Decimal("1.23456"), "abc")], | ||||||||
), | ||||||||
( | ||||||||
[["1.23456", "abc"]], | ||||||||
[("dec", "varchar(255)"), ("str", "varchar(3)")], | ||||||||
[["1.23456", "abc"]], | ||||||||
), | ||||||||
], | ||||||||
) | ||||||||
def test_column_type_mutator( | ||||||||
data: list[Union[tuple[Any, ...], list[Any]]], | ||||||||
description: list[Any], | ||||||||
expected_result: list[Union[tuple[Any, ...], list[Any]]], | ||||||||
): | ||||||||
from superset.db_engine_specs.trino import TrinoEngineSpec as spec | ||||||||
|
||||||||
mock_cursor = Mock() | ||||||||
mock_cursor.fetchall.return_value = data | ||||||||
mock_cursor.description = description | ||||||||
|
||||||||
assert spec.fetch_data(mock_cursor) == expected_result |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add a comment that explains the logic here? This dict comprehension might be hard to understand without the context of this PR