Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions superset/commands/dashboard/export_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ def export_dataset_yaml(
"schema": None, # Don't export - use target database's default schema
# Preserve SQL for virtual datasets, None for physical (data is in parquet)
"sql": dataset.sql if is_preserved_virtual else None,
# Track source database engine for SQL transpilation during import
"source_db_engine": (
dataset.database.db_engine_spec.engine if is_preserved_virtual else None
),
"params": None, # Don't export - contains stale import metadata
"template_params": dataset.template_params,
"filter_select_enabled": dataset.filter_select_enabled,
Expand Down
58 changes: 58 additions & 0 deletions superset/commands/importers/v1/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
from typing import Any, Optional

from marshmallow import Schema
from sqlalchemy.exc import MultipleResultsFound

from superset import db
from superset.charts.schemas import ImportV1ChartSchema
from superset.commands.chart.importers.v1 import ImportChartsCommand
from superset.commands.chart.importers.v1.utils import import_chart
Expand All @@ -41,9 +43,62 @@
from superset.dashboards.schemas import ImportV1DashboardSchema
from superset.databases.schemas import ImportV1DatabaseSchema
from superset.datasets.schemas import ImportV1DatasetSchema
from superset.exceptions import QueryClauseValidationException
from superset.models.core import Database
from superset.sql.parse import transpile_to_dialect
from superset.utils.core import get_example_default_schema
from superset.utils.decorators import transaction

logger = logging.getLogger(__name__)


def transpile_virtual_dataset_sql(config: dict[str, Any], database_id: int) -> None:
"""
Transpile virtual dataset SQL to the target database dialect.

This ensures that virtual datasets exported from one database type
(e.g., PostgreSQL) can be loaded into a different database type
(e.g., MySQL, DuckDB, SQLite).

Args:
config: Dataset configuration dict (modified in place)
database_id: ID of the target database
"""
sql = config.get("sql")
if not sql:
return

database = db.session.query(Database).get(database_id)
if not database:
logger.warning("Database %s not found, skipping SQL transpilation", database_id)
return

target_engine = database.db_engine_spec.engine
source_engine = config.get("source_db_engine")
if target_engine == source_engine:
logger.info("Source and target dialects are identical, skipping transpilation")
return

try:
transpiled_sql = transpile_to_dialect(sql, target_engine, source_engine)
if transpiled_sql != sql:
logger.info(
"Transpiled virtual dataset SQL for '%s' from %s to %s dialect",
config.get("table_name", "unknown"),
source_engine or "generic",
target_engine,
)
config["sql"] = transpiled_sql
except QueryClauseValidationException as ex:
logger.warning(
"Could not transpile SQL for dataset '%s' from %s to %s: %s. "
"Using original SQL which may not be compatible.",
config.get("table_name", "unknown"),
source_engine or "generic",
target_engine,
ex,
)


class ImportExamplesCommand(ImportModelsCommand):
"""Import examples"""
Expand Down Expand Up @@ -119,6 +174,9 @@ def _import( # pylint: disable=too-many-locals, too-many-branches # noqa: C901
if config["schema"] is None:
config["schema"] = get_example_default_schema()

# transpile virtual dataset SQL to target database dialect
transpile_virtual_dataset_sql(config, config["database_id"])

try:
dataset = import_dataset(
config,
Expand Down
2 changes: 2 additions & 0 deletions superset/datasets/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@ def fix_extra(self, data: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
schema = fields.String(allow_none=True)
catalog = fields.String(allow_none=True)
sql = fields.String(allow_none=True)
# Source database engine for SQL transpilation (virtual datasets only)
source_db_engine = fields.String(allow_none=True, load_default=None)
params = fields.Dict(allow_none=True)
template_params = fields.Dict(allow_none=True)
filter_select_enabled = fields.Boolean()
Expand Down
21 changes: 18 additions & 3 deletions superset/sql/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1522,9 +1522,21 @@ def sanitize_clause(clause: str, engine: str) -> str:
raise QueryClauseValidationException(f"Invalid SQL clause: {clause}") from ex


def transpile_to_dialect(sql: str, target_engine: str) -> str:
def transpile_to_dialect(
sql: str,
target_engine: str,
source_engine: str | None = None,
) -> str:
"""
Transpile SQL from "generic SQL" to the target database dialect using SQLGlot.
Transpile SQL from one database dialect to another using SQLGlot.

Args:
sql: The SQL query to transpile
target_engine: The target database engine (e.g., "mysql", "postgresql")
source_engine: The source database engine. If None, uses generic SQL dialect.

Returns:
The transpiled SQL string

If the target engine is not in SQLGLOT_DIALECTS, returns the SQL as-is.
"""
Expand All @@ -1534,8 +1546,11 @@ def transpile_to_dialect(sql: str, target_engine: str) -> str:
if target_dialect is None:
return sql

# Get source dialect (default to generic if not specified)
source_dialect = SQLGLOT_DIALECTS.get(source_engine) if source_engine else Dialect

try:
parsed = sqlglot.parse_one(sql, dialect=Dialect)
parsed = sqlglot.parse_one(sql, dialect=source_dialect)
return Dialect.get_or_raise(target_dialect).generate(
parsed,
copy=True,
Expand Down
244 changes: 244 additions & 0 deletions tests/unit_tests/commands/importers/v1/examples_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Tests for the examples importer, specifically SQL transpilation."""

from unittest.mock import MagicMock, patch

from superset.commands.importers.v1.examples import transpile_virtual_dataset_sql


def test_transpile_virtual_dataset_sql_no_sql():
"""Test that configs without SQL are unchanged."""
config = {"table_name": "my_table", "sql": None}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] is None


def test_transpile_virtual_dataset_sql_empty_sql():
"""Test that configs with empty SQL are unchanged."""
config = {"table_name": "my_table", "sql": ""}
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == ""


@patch("superset.commands.importers.v1.examples.db")
def test_transpile_virtual_dataset_sql_database_not_found(mock_db):
"""Test graceful handling when database is not found."""
mock_db.session.query.return_value.get.return_value = None

config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
original_sql = config["sql"]

transpile_virtual_dataset_sql(config, 999)

# SQL should remain unchanged
assert config["sql"] == original_sql


@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db):
"""Test successful SQL transpilation with source engine."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "mysql"
mock_db.session.query.return_value.get.return_value = mock_database

mock_transpile.return_value = "SELECT * FROM `foo`"

config = {
"table_name": "my_table",
"sql": "SELECT * FROM foo",
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)

assert config["sql"] == "SELECT * FROM `foo`"
mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", "postgresql")


@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_no_source_engine(mock_transpile, mock_db):
"""Test transpilation when source_db_engine is not specified (legacy)."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "mysql"
mock_db.session.query.return_value.get.return_value = mock_database

mock_transpile.return_value = "SELECT * FROM `foo`"

# No source_db_engine - should default to None (generic dialect)
config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
transpile_virtual_dataset_sql(config, 1)

assert config["sql"] == "SELECT * FROM `foo`"
mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", None)


@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db):
"""Test when transpilation returns same SQL (no dialect differences)."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "postgresql"
mock_db.session.query.return_value.get.return_value = mock_database

original_sql = "SELECT * FROM foo"
mock_transpile.return_value = original_sql

config = {
"table_name": "my_table",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)

assert config["sql"] == original_sql


@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db):
"""Test graceful fallback when transpilation fails."""
from superset.exceptions import QueryClauseValidationException

mock_database = MagicMock()
mock_database.db_engine_spec.engine = "mysql"
mock_db.session.query.return_value.get.return_value = mock_database

mock_transpile.side_effect = QueryClauseValidationException("Parse error")

original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo"
config = {
"table_name": "my_table",
"sql": original_sql,
"source_db_engine": "postgresql",
}

# Should not raise, should keep original SQL
transpile_virtual_dataset_sql(config, 1)
assert config["sql"] == original_sql


@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_postgres_to_duckdb(mock_transpile, mock_db):
"""Test transpilation from PostgreSQL to DuckDB."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "duckdb"
mock_db.session.query.return_value.get.return_value = mock_database

original_sql = """
SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
FROM orders WHERE status = 'completed' GROUP BY 1
"""
transpiled_sql = """
SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
FROM orders WHERE status = 'completed' GROUP BY 1
"""
mock_transpile.return_value = transpiled_sql

config = {
"table_name": "monthly_orders",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)

assert config["sql"] == transpiled_sql
mock_transpile.assert_called_once_with(original_sql, "duckdb", "postgresql")


@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_postgres_to_clickhouse(mock_transpile, mock_db):
"""Test transpilation from PostgreSQL to ClickHouse.

ClickHouse has different syntax for date functions, so this tests
real dialect differences.
"""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "clickhouse"
mock_db.session.query.return_value.get.return_value = mock_database

# PostgreSQL syntax
original_sql = "SELECT DATE_TRUNC('month', created_at) AS month FROM orders"
# ClickHouse uses toStartOfMonth instead
transpiled_sql = "SELECT toStartOfMonth(created_at) AS month FROM orders"
mock_transpile.return_value = transpiled_sql

config = {
"table_name": "monthly_orders",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)

assert config["sql"] == transpiled_sql
mock_transpile.assert_called_once_with(original_sql, "clickhouse", "postgresql")


@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_postgres_to_mysql(mock_transpile, mock_db):
"""Test transpilation from PostgreSQL to MySQL.

MySQL uses backticks for identifiers and has different casting syntax.
"""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "mysql"
mock_db.session.query.return_value.get.return_value = mock_database

# PostgreSQL syntax with :: casting
original_sql = "SELECT created_at::DATE AS date_only FROM orders"
# MySQL syntax with CAST
transpiled_sql = "SELECT CAST(created_at AS DATE) AS date_only FROM `orders`"
mock_transpile.return_value = transpiled_sql

config = {
"table_name": "orders_dates",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)

assert config["sql"] == transpiled_sql
mock_transpile.assert_called_once_with(original_sql, "mysql", "postgresql")


@patch("superset.commands.importers.v1.examples.db")
@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
def test_transpile_virtual_dataset_sql_postgres_to_sqlite(mock_transpile, mock_db):
"""Test transpilation from PostgreSQL to SQLite."""
mock_database = MagicMock()
mock_database.db_engine_spec.engine = "sqlite"
mock_db.session.query.return_value.get.return_value = mock_database

original_sql = "SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '7 days'"
transpiled_sql = (
"SELECT * FROM orders WHERE created_at > DATETIME('now', '-7 days')"
)
mock_transpile.return_value = transpiled_sql

config = {
"table_name": "recent_orders",
"sql": original_sql,
"source_db_engine": "postgresql",
}
transpile_virtual_dataset_sql(config, 1)

assert config["sql"] == transpiled_sql
mock_transpile.assert_called_once_with(original_sql, "sqlite", "postgresql")
Loading
Loading