Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions sqlmesh/cli/project_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sqlmesh.utils.date import yesterday_ds
from sqlmesh.utils.errors import SQLMeshError

from sqlmesh.core.config.common import DBT_PROJECT_FILENAME
from sqlmesh.core.config.connection import (
CONNECTION_CONFIG_TO_TYPE,
DIALECT_TO_TYPE,
Expand Down Expand Up @@ -113,11 +114,10 @@ def _gen_config(
- ambiguousorinvalidcolumn
- invalidselectstarexpansion
""",
ProjectTemplate.DBT: """from pathlib import Path

from sqlmesh.dbt.loader import sqlmesh_config

config = sqlmesh_config(Path(__file__).parent)
ProjectTemplate.DBT: f"""# --- Model Defaults ---
# https://sqlmesh.readthedocs.io/en/stable/reference/model_configuration/#model-defaults
model_defaults:
start: {start or yesterday_ds()}
""",
}

Expand Down Expand Up @@ -285,8 +285,13 @@ def init_example_project(
cli_mode: InitCliMode = InitCliMode.DEFAULT,
) -> Path:
root_path = Path(path)
config_extension = "py" if template == ProjectTemplate.DBT else "yaml"
config_path = root_path / f"config.{config_extension}"

config_path = root_path / "config.yaml"
if template == ProjectTemplate.DBT:
# name the config file `sqlmesh.yaml` to make it clear that within the context of all
# the existing yaml files DBT project, this one specifically relates to configuring the sqlmesh engine
config_path = root_path / "sqlmesh.yaml"

audits_path = root_path / "audits"
macros_path = root_path / "macros"
models_path = root_path / "models"
Expand All @@ -298,7 +303,7 @@ def init_example_project(
f"Found an existing config file '{config_path}'.\n\nPlease change to another directory or remove the existing file."
)

if template == ProjectTemplate.DBT and not Path(root_path, "dbt_project.yml").exists():
if template == ProjectTemplate.DBT and not Path(root_path, DBT_PROJECT_FILENAME).exists():
raise SQLMeshError(
"Required dbt project file 'dbt_project.yml' not found in the current directory.\n\nPlease add it or change directories before running `sqlmesh init` to set up your project."
)
Expand Down
10 changes: 10 additions & 0 deletions sqlmesh/core/config/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
from sqlmesh.utils.errors import ConfigError
from sqlmesh.utils.pydantic import field_validator

# Config files that can be present in the project dir
ALL_CONFIG_FILENAMES = ("config.py", "config.yml", "config.yaml", "sqlmesh.yml", "sqlmesh.yaml")

# For personal paths (~/.sqlmesh/) where python config is not supported
YAML_CONFIG_FILENAMES = tuple(n for n in ALL_CONFIG_FILENAMES if not n.endswith(".py"))

# Note: is here to prevent having to import from sqlmesh.dbt.loader which introduces a dependency
# on dbt-core in a native project
DBT_PROJECT_FILENAME = "dbt_project.yml"


class EnvironmentSuffixTarget(str, Enum):
# Intended to create virtual environments in their own schemas, with names like "<model_schema_name>__<env name>". The view name is untouched.
Expand Down
29 changes: 24 additions & 5 deletions sqlmesh/core/config/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
from sqlglot.helper import ensure_list

from sqlmesh.core import constants as c
from sqlmesh.core.config.common import (
ALL_CONFIG_FILENAMES,
YAML_CONFIG_FILENAMES,
DBT_PROJECT_FILENAME,
)
from sqlmesh.core.config.model import ModelDefaultsConfig
from sqlmesh.core.config.root import Config
from sqlmesh.utils import env_vars, merge_dicts, sys_path
Expand Down Expand Up @@ -51,10 +56,7 @@ def load_configs(
return {path: config for path in absolute_paths}

config_env_vars = None
personal_paths = [
sqlmesh_path / "config.yml",
sqlmesh_path / "config.yaml",
]
personal_paths = [sqlmesh_path / name for name in YAML_CONFIG_FILENAMES]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that python config wasnt previously allowed in personal paths, so that's why there is both YAML_CONFIG_FILENAMES and ALL_CONFIG_FILENAMES

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a reason for this. We can't correctly merge configs if the one in the home folder is a python config.

for path in personal_paths:
if path.exists():
config_env_vars = load_config_from_yaml(path).get("env_vars")
Expand All @@ -65,7 +67,7 @@ def load_configs(
return {
path: load_config_from_paths(
config_type,
project_paths=[path / "config.py", path / "config.yml", path / "config.yaml"],
project_paths=[path / name for name in ALL_CONFIG_FILENAMES],
personal_paths=personal_paths,
config_name=config,
)
Expand Down Expand Up @@ -156,6 +158,22 @@ def load_config_from_paths(
)

no_dialect_err_msg = "Default model SQL dialect is a required configuration parameter. Set it in the `model_defaults` `dialect` key in your config file."

# if "dbt_project.yml" is present *and there was no python config already defined*,
# create a basic one to ensure we are using the DBT loader.
# any config within yaml files will get overlayed on top of it.
if not python_config:
potential_project_files = [f / DBT_PROJECT_FILENAME for f in visited_folders]
dbt_project_file = next((f for f in potential_project_files if f.exists()), None)
if dbt_project_file:
from sqlmesh.dbt.loader import sqlmesh_config

dbt_python_config = sqlmesh_config(project_root=dbt_project_file.parent)
if type(dbt_python_config) != config_type:
dbt_python_config = convert_config_type(dbt_python_config, config_type)

python_config = dbt_python_config

if python_config:
model_defaults = python_config.model_defaults
if model_defaults.dialect is None:
Expand All @@ -165,6 +183,7 @@ def load_config_from_paths(
model_defaults = non_python_config.model_defaults
if model_defaults.dialect is None:
raise ConfigError(no_dialect_err_msg)

return non_python_config


Expand Down
3 changes: 2 additions & 1 deletion sqlmesh/dbt/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
from sqlmesh.utils.jinja import MacroReference
from sqlmesh.utils.pydantic import PydanticModel, field_validator
from sqlmesh.utils.yaml import load
from sqlmesh.core.config.common import DBT_PROJECT_FILENAME

T = t.TypeVar("T", bound="GeneralConfig")

PROJECT_FILENAME = "dbt_project.yml"
PROJECT_FILENAME = DBT_PROJECT_FILENAME

JINJA_ONLY = {
"adapter",
Expand Down
10 changes: 8 additions & 2 deletions sqlmesh/dbt/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from dbt.tracking import do_not_track

from sqlmesh.core import constants as c
from sqlmesh.core.config import ModelDefaultsConfig
from sqlmesh.dbt.basemodel import Dependencies
from sqlmesh.dbt.builtin import BUILTIN_FILTERS, BUILTIN_GLOBALS, OVERRIDDEN_MACROS
from sqlmesh.dbt.model import ModelConfig
Expand Down Expand Up @@ -78,12 +79,14 @@ def __init__(
target: TargetConfig,
variable_overrides: t.Optional[t.Dict[str, t.Any]] = None,
cache_dir: t.Optional[str] = None,
model_defaults: t.Optional[ModelDefaultsConfig] = None,
):
self.project_path = project_path
self.profiles_path = profiles_path
self.profile_name = profile_name
self.target = target
self.variable_overrides = variable_overrides or {}
self.model_defaults = model_defaults or ModelDefaultsConfig()

self.__manifest: t.Optional[Manifest] = None
self._project_name: str = ""
Expand Down Expand Up @@ -380,9 +383,12 @@ def _load_manifest(self) -> Manifest:
profile = self._load_profile()
project = self._load_project(profile)

if not any(k in project.models for k in ("start", "+start")):
if (
not any(k in project.models for k in ("start", "+start"))
and not self.model_defaults.start
):
raise ConfigError(
"SQLMesh's requires a start date in order to have a finite range of backfilling data. Add start to the 'models:' block in dbt_project.yml. https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#setting-model-backfill-start-dates"
"SQLMesh requires a start date in order to have a finite range of backfilling data. Add start to the 'models:' block in dbt_project.yml. https://sqlmesh.readthedocs.io/en/stable/integrations/dbt/#setting-model-backfill-start-dates"
)

runtime_config = RuntimeConfig.from_parts(project, profile, args)
Expand Down
2 changes: 1 addition & 1 deletion sqlmesh/dbt/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ def to_sqlmesh(
query,
dialect=model_dialect,
kind=kind,
start=self.start,
start=self.start or context.sqlmesh_config.model_defaults.start,
audit_definitions=audit_definitions,
path=model_kwargs.pop("path", self.path),
# This ensures that we bypass query rendering that would otherwise be required to extract additional
Expand Down
1 change: 1 addition & 0 deletions sqlmesh/dbt/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def load(cls, context: DbtContext, variables: t.Optional[t.Dict[str, t.Any]] = N
target=profile.target,
variable_overrides=variable_overrides,
cache_dir=context.sqlmesh_config.cache_dir,
model_defaults=context.sqlmesh_config.model_defaults,
)

extra_fields = profile.target.extra
Expand Down
59 changes: 17 additions & 42 deletions sqlmesh_dbt/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def create(

from sqlmesh import configure_logging
from sqlmesh.core.context import Context
from sqlmesh.dbt.loader import sqlmesh_config, DbtLoader
from sqlmesh.dbt.loader import DbtLoader
from sqlmesh.core.console import set_console
from sqlmesh_dbt.console import DbtCliConsole
from sqlmesh.utils.errors import SQLMeshError
Expand All @@ -71,34 +71,14 @@ def create(

progress.update(load_task_id, description="Loading project", total=None)

# inject default start date if one is not specified to prevent the user from having to do anything
_inject_default_start_date(project_dir)

config = sqlmesh_config(
project_root=project_dir,
# do we want to use a local duckdb for state?
# warehouse state has a bunch of overhead to initialize, is slow for ongoing operations and will create tables that perhaps the user was not expecting
# on the other hand, local state is not portable
state_connection=None,
)
project_dir = project_dir or Path.cwd()
init_project_if_required(project_dir)

sqlmesh_context = Context(
config=config,
paths=[project_dir],
load=True,
)

# this helps things which want a default project-level start date, like the "effective from date" for forward-only plans
if not sqlmesh_context.config.model_defaults.start:
min_start_date = min(
(
model.start
for model in sqlmesh_context.models.values()
if model.start is not None
),
default=None,
)
sqlmesh_context.config.model_defaults.start = min_start_date

dbt_loader = sqlmesh_context._loaders[0]
if not isinstance(dbt_loader, DbtLoader):
raise SQLMeshError(f"Unexpected loader type: {type(dbt_loader)}")
Expand All @@ -109,25 +89,20 @@ def create(
return DbtOperations(sqlmesh_context, dbt_project)


def _inject_default_start_date(project_dir: t.Optional[Path] = None) -> None:
def init_project_if_required(project_dir: Path) -> None:
"""
SQLMesh needs a start date to as the starting point for calculating intervals on incremental models
SQLMesh needs a start date to as the starting point for calculating intervals on incremental models, amongst other things

Rather than forcing the user to update their config manually or having a default that is not saved between runs,
we can inject it automatically to the dbt_project.yml file
we can generate a basic SQLMesh config if it doesnt exist.

This is preferable to trying to inject config into `dbt_project.yml` because it means we have full control over the file
and dont need to worry about accidentally reformatting it or accidentally clobbering other config
"""
from sqlmesh.dbt.project import PROJECT_FILENAME, load_yaml
from sqlmesh.utils.yaml import dump
from sqlmesh.utils.date import yesterday_ds

project_yaml_path = (project_dir or Path.cwd()) / PROJECT_FILENAME
if project_yaml_path.exists():
loaded_project_file = load_yaml(project_yaml_path)
start_date_keys = ("start", "+start")
if "models" in loaded_project_file and all(
k not in loaded_project_file["models"] for k in start_date_keys
):
loaded_project_file["models"]["+start"] = yesterday_ds()
# todo: this may format the file differently, is that acceptable?
with project_yaml_path.open("w") as f:
dump(loaded_project_file, f)
from sqlmesh.cli.project_init import init_example_project, ProjectTemplate
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't all these imports be under the check of the file existence? Otherwise doesn't this defeat the purpose?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you mean? The sqlmesh module has already been imported by the time init_project_if_required() is called, so these imports are "free"

from sqlmesh.core.config.common import ALL_CONFIG_FILENAMES
from sqlmesh.core.console import get_console

if not any(f.exists() for f in [project_dir / file for file in ALL_CONFIG_FILENAMES]):
get_console().log_warning("No existing SQLMesh config detected; creating one")
init_example_project(path=project_dir, engine_type=None, template=ProjectTemplate.DBT)
16 changes: 4 additions & 12 deletions tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1954,21 +1954,13 @@ def test_init_dbt_template(runner: CliRunner, tmp_path: Path):
)
assert result.exit_code == 0

config_path = tmp_path / "config.py"
config_path = tmp_path / "sqlmesh.yaml"
assert config_path.exists()

with open(config_path) as file:
config = file.read()

assert (
config
== """from pathlib import Path
config = config_path.read_text()

from sqlmesh.dbt.loader import sqlmesh_config

config = sqlmesh_config(Path(__file__).parent)
"""
)
assert "model_defaults" in config
assert "start:" in config


@time_machine.travel(FREEZE_TIME)
Expand Down
24 changes: 24 additions & 0 deletions tests/cli/test_project_init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest
from pathlib import Path
from sqlmesh.utils.errors import SQLMeshError
from sqlmesh.cli.project_init import init_example_project, ProjectTemplate
from sqlmesh.utils import yaml


def test_project_init_dbt(tmp_path: Path):
assert not len(list(tmp_path.glob("**/*")))

with pytest.raises(SQLMeshError, match=r"Required dbt project file.*not found"):
init_example_project(path=tmp_path, engine_type=None, template=ProjectTemplate.DBT)

with (tmp_path / "dbt_project.yml").open("w") as f:
yaml.dump({"name": "jaffle_shop"}, f)

init_example_project(path=tmp_path, engine_type=None, template=ProjectTemplate.DBT)
files = [f for f in tmp_path.glob("**/*") if f.is_file()]

assert set([f.name for f in files]) == set(["sqlmesh.yaml", "dbt_project.yml"])

sqlmesh_config = next(f for f in files if f.name == "sqlmesh.yaml")
assert "model_defaults" in sqlmesh_config.read_text()
assert "start: " in sqlmesh_config.read_text()
Loading
Loading