-
Notifications
You must be signed in to change notification settings - Fork 348
Feat: yaml config for dbt projects #5156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -61,7 +61,7 @@ def create( | |
|
|
||
| from sqlmesh import configure_logging | ||
| from sqlmesh.core.context import Context | ||
| from sqlmesh.dbt.loader import sqlmesh_config, DbtLoader | ||
| from sqlmesh.dbt.loader import DbtLoader | ||
| from sqlmesh.core.console import set_console | ||
| from sqlmesh_dbt.console import DbtCliConsole | ||
| from sqlmesh.utils.errors import SQLMeshError | ||
|
|
@@ -71,34 +71,14 @@ def create( | |
|
|
||
| progress.update(load_task_id, description="Loading project", total=None) | ||
|
|
||
| # inject default start date if one is not specified to prevent the user from having to do anything | ||
| _inject_default_start_date(project_dir) | ||
|
|
||
| config = sqlmesh_config( | ||
| project_root=project_dir, | ||
| # do we want to use a local duckdb for state? | ||
| # warehouse state has a bunch of overhead to initialize, is slow for ongoing operations and will create tables that perhaps the user was not expecting | ||
| # on the other hand, local state is not portable | ||
| state_connection=None, | ||
| ) | ||
| project_dir = project_dir or Path.cwd() | ||
| init_project_if_required(project_dir) | ||
|
|
||
| sqlmesh_context = Context( | ||
| config=config, | ||
| paths=[project_dir], | ||
| load=True, | ||
| ) | ||
|
|
||
| # this helps things which want a default project-level start date, like the "effective from date" for forward-only plans | ||
| if not sqlmesh_context.config.model_defaults.start: | ||
| min_start_date = min( | ||
| ( | ||
| model.start | ||
| for model in sqlmesh_context.models.values() | ||
| if model.start is not None | ||
| ), | ||
| default=None, | ||
| ) | ||
| sqlmesh_context.config.model_defaults.start = min_start_date | ||
|
|
||
| dbt_loader = sqlmesh_context._loaders[0] | ||
| if not isinstance(dbt_loader, DbtLoader): | ||
| raise SQLMeshError(f"Unexpected loader type: {type(dbt_loader)}") | ||
|
|
@@ -109,25 +89,20 @@ def create( | |
| return DbtOperations(sqlmesh_context, dbt_project) | ||
|
|
||
|
|
||
| def _inject_default_start_date(project_dir: t.Optional[Path] = None) -> None: | ||
| def init_project_if_required(project_dir: Path) -> None: | ||
| """ | ||
| SQLMesh needs a start date to as the starting point for calculating intervals on incremental models | ||
| SQLMesh needs a start date to as the starting point for calculating intervals on incremental models, amongst other things | ||
|
|
||
| Rather than forcing the user to update their config manually or having a default that is not saved between runs, | ||
| we can inject it automatically to the dbt_project.yml file | ||
| we can generate a basic SQLMesh config if it doesnt exist. | ||
|
|
||
| This is preferable to trying to inject config into `dbt_project.yml` because it means we have full control over the file | ||
| and dont need to worry about accidentally reformatting it or accidentally clobbering other config | ||
| """ | ||
| from sqlmesh.dbt.project import PROJECT_FILENAME, load_yaml | ||
| from sqlmesh.utils.yaml import dump | ||
| from sqlmesh.utils.date import yesterday_ds | ||
|
|
||
| project_yaml_path = (project_dir or Path.cwd()) / PROJECT_FILENAME | ||
| if project_yaml_path.exists(): | ||
| loaded_project_file = load_yaml(project_yaml_path) | ||
| start_date_keys = ("start", "+start") | ||
| if "models" in loaded_project_file and all( | ||
| k not in loaded_project_file["models"] for k in start_date_keys | ||
| ): | ||
| loaded_project_file["models"]["+start"] = yesterday_ds() | ||
| # todo: this may format the file differently, is that acceptable? | ||
| with project_yaml_path.open("w") as f: | ||
| dump(loaded_project_file, f) | ||
| from sqlmesh.cli.project_init import init_example_project, ProjectTemplate | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't all these imports be under the check of the file existence? Otherwise doesn't this defeat the purpose?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do you mean? The |
||
| from sqlmesh.core.config.common import ALL_CONFIG_FILENAMES | ||
| from sqlmesh.core.console import get_console | ||
|
|
||
| if not any(f.exists() for f in [project_dir / file for file in ALL_CONFIG_FILENAMES]): | ||
| get_console().log_warning("No existing SQLMesh config detected; creating one") | ||
| init_example_project(path=project_dir, engine_type=None, template=ProjectTemplate.DBT) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| import pytest | ||
| from pathlib import Path | ||
| from sqlmesh.utils.errors import SQLMeshError | ||
| from sqlmesh.cli.project_init import init_example_project, ProjectTemplate | ||
| from sqlmesh.utils import yaml | ||
|
|
||
|
|
||
| def test_project_init_dbt(tmp_path: Path): | ||
| assert not len(list(tmp_path.glob("**/*"))) | ||
|
|
||
| with pytest.raises(SQLMeshError, match=r"Required dbt project file.*not found"): | ||
| init_example_project(path=tmp_path, engine_type=None, template=ProjectTemplate.DBT) | ||
|
|
||
| with (tmp_path / "dbt_project.yml").open("w") as f: | ||
| yaml.dump({"name": "jaffle_shop"}, f) | ||
|
|
||
| init_example_project(path=tmp_path, engine_type=None, template=ProjectTemplate.DBT) | ||
| files = [f for f in tmp_path.glob("**/*") if f.is_file()] | ||
|
|
||
| assert set([f.name for f in files]) == set(["sqlmesh.yaml", "dbt_project.yml"]) | ||
|
|
||
| sqlmesh_config = next(f for f in files if f.name == "sqlmesh.yaml") | ||
| assert "model_defaults" in sqlmesh_config.read_text() | ||
| assert "start: " in sqlmesh_config.read_text() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I noticed that python config wasnt previously allowed in personal paths, so that's why there is both
YAML_CONFIG_FILENAMESandALL_CONFIG_FILENAMESThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a reason for this. We can't correctly merge configs if the one in the home folder is a python config.