Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 88 additions & 5 deletions superset/commands/importers/v1/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from superset.commands.importers.v1.utils import (
safe_insert_dashboard_chart_relationships,
)
from superset.connectors.sqla.models import SqlaTable
from superset.daos.base import BaseDAO
from superset.dashboards.schemas import ImportV1DashboardSchema
from superset.databases.schemas import ImportV1DatabaseSchema
Expand Down Expand Up @@ -184,13 +185,48 @@ def _import( # pylint: disable=too-many-locals, too-many-branches # noqa: C901
force_data=force_data,
ignore_permissions=True,
)
logger.info(
"Dataset imported: %s (uuid=%s, id=%s)",
config.get("table_name"),
config.get("uuid"),
dataset.id,
)
except MultipleResultsFound:
# Multiple results can be found for datasets. There was a bug in
# load-examples that resulted in datasets being loaded with a NULL
# schema. Users could then add a new dataset with the same name in
# the correct schema, resulting in duplicates, since the uniqueness
# constraint was not enforced correctly in the application logic.
# See https://github.com/apache/superset/issues/16051.
#
# Still add to dataset_info so charts can be imported
logger.warning(
"MultipleResultsFound for dataset %s (uuid=%s, schema=%s)",
config.get("table_name"),
config.get("uuid"),
config.get("schema"),
)
dataset = (
db.session.query(SqlaTable)
.filter_by(uuid=config["uuid"])
.first()
)
if dataset:
logger.info(
"Recovered dataset via UUID lookup: %s (id=%s)",
dataset.table_name,
dataset.id,
)
dataset_info[str(dataset.uuid)] = {
"datasource_id": dataset.id,
"datasource_type": "table",
"datasource_name": dataset.table_name,
}
else:
logger.error(
"Failed to recover dataset %s - UUID lookup returned None",
config.get("table_name"),
)
continue

dataset_info[str(dataset.uuid)] = {
Expand All @@ -202,10 +238,24 @@ def _import( # pylint: disable=too-many-locals, too-many-branches # noqa: C901
# import charts
chart_ids: dict[str, int] = {}
for file_name, config in configs.items():
if (
file_name.startswith("charts/")
and config["dataset_uuid"] in dataset_info
):
if file_name.startswith("charts/"):
chart_name = config.get("slice_name", "unknown")
chart_uuid = config.get("uuid", "unknown")
logger.info(
"Importing chart: %s (uuid=%s, dataset_uuid=%s)",
chart_name,
chart_uuid,
config.get("dataset_uuid"),
)
if config["dataset_uuid"] not in dataset_info:
logger.warning(
"SKIP chart %s: dataset_uuid %s not in dataset_info "
"(available datasets: %d)",
chart_name,
config["dataset_uuid"],
len(dataset_info),
)
continue
# update datasource id, type, and name
config.update(dataset_info[config["dataset_uuid"]])
chart = import_chart(
Expand All @@ -214,14 +264,34 @@ def _import( # pylint: disable=too-many-locals, too-many-branches # noqa: C901
ignore_permissions=True,
)
chart_ids[str(chart.uuid)] = chart.id
logger.info(
"OK chart: %s (id=%s)",
chart_name,
chart.id,
)

# import dashboards
dashboard_chart_ids: list[tuple[int, int]] = []
for file_name, config in configs.items():
if file_name.startswith("dashboards/"):
dashboard_title = config.get("dashboard_title", "unknown")
dashboard_uuid = config.get("uuid", "unknown")
logger.info(
"Importing dashboard: %s (uuid=%s)",
dashboard_title,
dashboard_uuid,
)
try:
config = update_id_refs(config, chart_ids, dataset_info)
except KeyError:
except KeyError as ex:
logger.error(
"SKIP dashboard %s: KeyError %s "
"(charts imported: %d, datasets imported: %d)",
dashboard_title,
ex,
len(chart_ids),
len(dataset_info),
)
continue

dashboard = import_dashboard(
Expand All @@ -230,10 +300,23 @@ def _import( # pylint: disable=too-many-locals, too-many-branches # noqa: C901
ignore_permissions=True,
)
dashboard.published = True
logger.info(
"OK dashboard: %s (id=%s)",
dashboard_title,
dashboard.id,
)

for uuid in find_chart_uuids(config["position"]):
chart_id = chart_ids[uuid]
dashboard_chart_ids.append((dashboard.id, chart_id))

# set ref in the dashboard_slices table
safe_insert_dashboard_chart_relationships(dashboard_chart_ids)

# Log import summary
logger.info(
"Import complete: %d datasets, %d charts, %d dashboard-chart links",
len(dataset_info),
len(chart_ids),
len(dashboard_chart_ids),
)
31 changes: 28 additions & 3 deletions superset/examples/data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

# Import loaders that have custom logic (dashboards, CSS, etc.)
from superset.cli.test_loaders import load_big_data
from superset.utils.core import backend, get_example_default_schema

from .css_templates import load_css_templates

Expand All @@ -35,11 +36,12 @@


def get_dataset_config_from_yaml(example_dir: Path) -> Dict[str, Optional[str]]:
"""Read table_name, schema, and data_file from dataset.yaml if it exists."""
"""Read table_name, schema, data_file, and uuid from dataset.yaml if it exists."""
result: Dict[str, Optional[str]] = {
"table_name": None,
"schema": None,
"data_file": None,
"uuid": None,
}
dataset_yaml = example_dir / "dataset.yaml"
if dataset_yaml.exists():
Expand All @@ -48,6 +50,7 @@ def get_dataset_config_from_yaml(example_dir: Path) -> Dict[str, Optional[str]]:
config = yaml.safe_load(f)
result["table_name"] = config.get("table_name")
result["data_file"] = config.get("data_file")
result["uuid"] = config.get("uuid")
schema = config.get("schema")
# Treat SQLite's 'main' schema as null (use target database default)
result["schema"] = None if schema == "main" else schema
Expand All @@ -72,6 +75,7 @@ def _get_multi_dataset_config(
"table_name": dataset_name,
"schema": None,
"data_file": data_file,
"uuid": None,
}

if not datasets_yaml.exists():
Expand All @@ -81,6 +85,7 @@ def _get_multi_dataset_config(
with open(datasets_yaml) as f:
yaml_config = yaml.safe_load(f)
result["table_name"] = yaml_config.get("table_name") or dataset_name
result["uuid"] = yaml_config.get("uuid")
raw_schema = yaml_config.get("schema")
result["schema"] = None if raw_schema == "main" else raw_schema

Expand All @@ -101,6 +106,23 @@ def _get_multi_dataset_config(
return result


def _get_effective_schema(config_schema: Optional[str]) -> Optional[str]:
"""Get effective schema for data loading, matching import flow behavior.

Some databases (SQLite) don't support real schemas - their 'main' is just
an attachment name, not a schema like PostgreSQL's 'public'. For these
backends, we return None to avoid schema-related SQL errors.
"""
# Backends that don't support real schemas
no_schema_backends = {"sqlite"}

if config_schema:
return config_schema
if backend() in no_schema_backends:
return None
return get_example_default_schema()


def discover_datasets() -> Dict[str, Callable[..., None]]:
"""Auto-discover all example datasets and create loaders for them.

Expand Down Expand Up @@ -140,8 +162,9 @@ def discover_datasets() -> Dict[str, Callable[..., None]]:
loaders[loader_name] = create_generic_loader(
dataset_name,
table_name=table_name,
schema=config["schema"],
schema=_get_effective_schema(config["schema"]),
data_file=resolved_file,
uuid=config.get("uuid"),
)

# Discover multiple parquet files in data/ folders (complex examples)
Expand All @@ -153,13 +176,15 @@ def discover_datasets() -> Dict[str, Callable[..., None]]:
continue

config = _get_multi_dataset_config(example_dir, dataset_name, data_file)

loader_name = f"load_{dataset_name}"
if loader_name not in loaders:
loaders[loader_name] = create_generic_loader(
dataset_name,
table_name=config["table_name"],
schema=config["schema"],
schema=_get_effective_schema(config["schema"]),
data_file=config["data_file"],
uuid=config.get("uuid"),
)

return loaders
Expand Down
Loading
Loading