Skip to content

Commit 6162bf4

Browse files
dpgasparclaude
andcommitted
fix(examples): set dataset UUID from YAML in generic loader
The generic data loader creates SqlaTable rows without the UUID from the YAML config. When load_examples_from_configs() later tries to import the same dataset via YAML, it looks up by UUID, misses the existing row, and tries to INSERT a duplicate — hitting a UNIQUE constraint on table_name. Fix by reading the UUID from dataset YAML configs and setting it on the SqlaTable during generic data loading, so the YAML import path finds the existing dataset by UUID. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent eb1573c commit 6162bf4

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

superset/examples/data_loading.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def get_dataset_config_from_yaml(example_dir: Path) -> Dict[str, Optional[str]]:
4040
"table_name": None,
4141
"schema": None,
4242
"data_file": None,
43+
"uuid": None,
4344
}
4445
dataset_yaml = example_dir / "dataset.yaml"
4546
if dataset_yaml.exists():
@@ -48,6 +49,7 @@ def get_dataset_config_from_yaml(example_dir: Path) -> Dict[str, Optional[str]]:
4849
config = yaml.safe_load(f)
4950
result["table_name"] = config.get("table_name")
5051
result["data_file"] = config.get("data_file")
52+
result["uuid"] = config.get("uuid")
5153
schema = config.get("schema")
5254
# Treat SQLite's 'main' schema as null (use target database default)
5355
result["schema"] = None if schema == "main" else schema
@@ -81,6 +83,7 @@ def _get_multi_dataset_config(
8183
with open(datasets_yaml) as f:
8284
yaml_config = yaml.safe_load(f)
8385
result["table_name"] = yaml_config.get("table_name") or dataset_name
86+
result["uuid"] = yaml_config.get("uuid")
8487
raw_schema = yaml_config.get("schema")
8588
result["schema"] = None if raw_schema == "main" else raw_schema
8689

@@ -142,6 +145,7 @@ def discover_datasets() -> Dict[str, Callable[..., None]]:
142145
table_name=table_name,
143146
schema=config["schema"],
144147
data_file=resolved_file,
148+
uuid=config.get("uuid"),
145149
)
146150

147151
# Discover multiple parquet files in data/ folders (complex examples)
@@ -160,6 +164,7 @@ def discover_datasets() -> Dict[str, Callable[..., None]]:
160164
table_name=config["table_name"],
161165
schema=config["schema"],
162166
data_file=config["data_file"],
167+
uuid=config.get("uuid"),
163168
)
164169

165170
return loaders

superset/examples/generic_loader.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def load_parquet_table( # noqa: C901
5757
sample_rows: Optional[int] = None,
5858
data_file: Optional[Any] = None,
5959
schema: Optional[str] = None,
60+
uuid: Optional[str] = None,
6061
) -> SqlaTable:
6162
"""Load a Parquet file into the example database.
6263
@@ -175,6 +176,10 @@ def safe_serialize(x: Any, column_name: str) -> Optional[str]:
175176
# Set the database reference
176177
tbl.database = database
177178

179+
# Set UUID from YAML config so the YAML import path can find this dataset
180+
if uuid and not tbl.uuid:
181+
tbl.uuid = uuid
182+
178183
if not only_metadata:
179184
# Ensure database reference is set before fetching metadata
180185
if not tbl.database:
@@ -194,6 +199,7 @@ def create_generic_loader(
194199
sample_rows: Optional[int] = None,
195200
data_file: Optional[Any] = None,
196201
schema: Optional[str] = None,
202+
uuid: Optional[str] = None,
197203
) -> Callable[[Database, SqlaTable], None]:
198204
"""Create a loader function for a specific Parquet file.
199205
@@ -230,6 +236,7 @@ def loader(
230236
sample_rows=rows,
231237
data_file=data_file,
232238
schema=schema,
239+
uuid=uuid,
233240
)
234241

235242
if description and tbl:

0 commit comments

Comments
 (0)