From d2e7ef96259053729a1f6ea4b55800eded41467f Mon Sep 17 00:00:00 2001 From: Florian Hines Date: Thu, 20 Oct 2022 13:15:11 -0500 Subject: [PATCH] feat: edk based dbt extension (#2) Co-authored-by: Pat Nadolny --- .flake8 | 7 +- .github/workflows/test.yml | 2 +- .pre-commit-config.yaml | 2 + README.md | 237 ++++++++++++++++++ dbt_ext/extension.py | 116 ++++++++- files_dbt_ext/bundle/__init__.py | 0 files_dbt_ext/bundle/transform/.gitignore | 3 + files_dbt_ext/bundle/transform/__init__.py | 0 .../bundle/transform/dbt_project.yml | 26 ++ .../bundle/transform/models/.gitkeep | 0 files_dbt_ext/profiles/__init__.py | 0 files_dbt_ext/profiles/bigquery/profiles.yaml | 39 +++ files_dbt_ext/profiles/duckdb/profiles.yml | 18 ++ files_dbt_ext/profiles/postgres/profiles.yml | 49 ++++ files_dbt_ext/profiles/redshift/profiles.yml | 63 +++++ files_dbt_ext/profiles/snowflake/profiles.yml | 41 +++ poetry.lock | 30 ++- pyproject.toml | 24 +- tests/test_dbt_ext.py | 39 ++- 19 files changed, 674 insertions(+), 22 deletions(-) create mode 100644 files_dbt_ext/bundle/__init__.py create mode 100644 files_dbt_ext/bundle/transform/.gitignore create mode 100644 files_dbt_ext/bundle/transform/__init__.py create mode 100644 files_dbt_ext/bundle/transform/dbt_project.yml create mode 100644 files_dbt_ext/bundle/transform/models/.gitkeep create mode 100644 files_dbt_ext/profiles/__init__.py create mode 100644 files_dbt_ext/profiles/bigquery/profiles.yaml create mode 100644 files_dbt_ext/profiles/duckdb/profiles.yml create mode 100644 files_dbt_ext/profiles/postgres/profiles.yml create mode 100644 files_dbt_ext/profiles/redshift/profiles.yml create mode 100644 files_dbt_ext/profiles/snowflake/profiles.yml diff --git a/.flake8 b/.flake8 index 4edfea8..6a01ab2 100644 --- a/.flake8 +++ b/.flake8 @@ -1,7 +1,12 @@ [flake8] ignore = W503, C901, ANN101 max-line-length = 88 -exclude = files_db_ext +exclude = files_dbt_ext max-complexity = 10 docstring-convention = google allow-star-arg-any = true +per-file-ignores = + # Don't require docstrings or type annotations in tests + tests/*:D100,D102,D103,D104,DAR,ANN + # missing params in docstring + main.py: DAR101 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0202805..cbdebe9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,4 +32,4 @@ jobs: poetry install - name: Test with pytest run: | - poetry run pytest + DBT_EXT_TYPE=postgres poetry run pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a087f9..f802e79 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,9 +36,11 @@ repos: - darglint==1.8.1 - flake8-annotations==2.9.0 - flake8-docstrings==1.6.0 + exclude: 'files_dbt_ext' - repo: https://github.com/asottile/pyupgrade rev: v3.1.0 hooks: - id: pyupgrade args: [--py37-plus] + exclude: 'files_dbt_ext' diff --git a/README.md b/README.md index bb7c8b1..7e4f12f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,240 @@ # dbt-ext Meltano dbt utility extension + +## Configuration + +Note the new `DBT_EXT_TYPE` setting, used to indicate what dbt profile should be used, and new default executable of +`dbt_invoker` instead of `dbt`. + +### dbt-postgres + +```yaml +plugins: + utilities: + - name: dbt-postgres + label: dbt PostgreSQL extension + executable: dbt_invoker + namespace: dbt_ext + pip_url: dbt-core~=1.1.0 dbt-postgres~=1.1.0 git+https://github.com/meltano/dbt-ext.git@feat/working-dbt-ext + settings: + - name: skip_pre_invoke + kind: boolean + value: false + description: Whether to skip pre-invoke hooks which automatically run dbt clean and deps + env: DBT_EXT_SKIP_PRE_INVOKE + - name: type + env: DBT_EXT_TYPE + value: postgres + - name: project_dir + label: Projects Directory + value: $MELTANO_PROJECT_ROOT/transform + - name: profiles_dir + label: Profiles Directory + value: $MELTANO_PROJECT_ROOT/transform/profiles/postgres + env: DBT_PROFILES_DIR + # Postgres connection settings are set via `config:` blocks and mapped to `profiles.yml` + - name: host + label: Host + kind: string + description: | + The postgres host to connect to. + - name: user + label: User + kind: string + description: | + The user to connect as. + - name: password + label: Password + kind: password + description: | + The password to connect with. + - name: port + label: Port + kind: integer + description: | + The port to connect to. + - name: dbname + label: Database + aliases: ["database"] + kind: string + description: | + The db to connect to. + - name: schema + label: Schema + kind: string + description: | + The schema to use. + - name: keepalives_idle + label: Keep Alives Idle + kind: integer + description: | + Seconds between TCP keepalive packets. + - name: search_path + label: Search Path + kind: string + description: | + Overrides the default search path. + - name: role + label: Role + kind: string + description: | + Role for dbt to assume when executing queries. + - name: sslmode + label: SSL Mode + kind: array + description: | + SSL Mode used to connect to the database. + commands: + clean: + args: clean + description: Delete all folders in the clean-targets list (usually the dbt_modules and target directories.) + compile: + args: compile + description: Generates executable SQL from source model, test, and analysis files. Compiled SQL files are written to the target/ directory. + deps: + args: deps + description: Pull the most recent version of the dependencies listed in packages.yml + run: + args: run + description: Compile SQL and execute against the current target database. + seed: + args: seed + description: Load data from csv files into your data warehouse. + snapshot: + args: snapshot + description: Execute snapshots defined in your project. + test: + args: test + description: Runs tests on data in deployed models. + freshness: + args: source freshness + description: Check the freshness of your source data. + build: + args: build + description: Will run your models, tests, snapshots and seeds in DAG order. + docs-generate: + args: docs generate + description: Generate documentation for your project. + docs-serve: + args: docs serve + description: Serve documentation for your project. Make sure you ran `docs-generate` first. + debug: + args: debug + description: Debug your DBT project and warehouse connection. + describe: + args: describe + executable: dbt_extension + initialize: + args: initialize + executable: dbt_extension +``` + +### dbt-snowflake + +```yaml + - name: dbt-snowflake + label: dbt Snowflake + executable: dbt_invoker + namespace: dbt_ext + pip_url: dbt-core~=1.1.0 dbt-snowflake~=1.1.0 git+https://github.com/meltano/dbt-ext.git@feat/working-dbt-ext + settings: + - name: skip_pre_invoke + kind: boolean + value: false + description: Whether to skip pre-invoke hooks which automatically run dbt clean and deps + env: DBT_EXT_SKIP_PRE_INVOKE + - name: type + env: DBT_EXT_TYPE + value: snowflake + - name: project_dir + label: Projects Directory + value: $MELTANO_PROJECT_ROOT/transform + - name: profiles_dir + label: Profiles Directory + env: DBT_PROFILES_DIR + value: $MELTANO_PROJECT_ROOT/transform/profiles/snowflake + - name: account + label: Account + kind: string + description: The snowflake account to connect to. + - name: user + label: User + kind: string + description: The user to connect as. + - name: password + label: Password + kind: password + description: The user password to authenticate with. + - name: role + label: Role + kind: string + description: The user role to assume. + - name: warehouse + label: Warehouse + kind: string + description: The compute warehouse to use when building models. + - name: database + label: Database + kind: string + description: The database to create models in. + - name: schema + label: Schema + kind: string + description: The schema to build models into by default. + commands: + clean: + args: clean + description: Delete all folders in the clean-targets list (usually the dbt_modules + and target directories.) + compile: + args: compile + description: Generates executable SQL from source model, test, and analysis files. + Compiled SQL files are written to the target/ directory. + deps: + args: deps + description: Pull the most recent version of the dependencies listed in packages.yml + run: + args: run + description: Compile SQL and execute against the current target database. + seed: + args: seed + description: Load data from csv files into your data warehouse. + snapshot: + args: snapshot + description: Execute snapshots defined in your project. + test: + args: test + description: Runs tests on data in deployed models. + freshness: + args: source freshness + description: Check the freshness of your source data. + build: + args: build + description: Will run your models, tests, snapshots and seeds in DAG order. + docs-generate: + args: docs generate + description: Generate documentation for your project. + docs-serve: + args: docs serve + description: Serve documentation for your project. Make sure you ran `docs-generate` first. + debug: + args: debug + description: Debug your DBT project and warehouse connection. + describe: + args: describe + executable: dbt_extension + initialize: + args: initialize + executable: dbt_extension +``` + + +## Installation + +```bash +meltano install utility dbt-postgres +meltano invoke dbt-postgres:initialize +meltano invoke dbt-postgres list +meltano invoke dbt-postgres test +``` diff --git a/dbt_ext/extension.py b/dbt_ext/extension.py index 61cd1eb..6d4984e 100644 --- a/dbt_ext/extension.py +++ b/dbt_ext/extension.py @@ -2,7 +2,7 @@ from __future__ import annotations import os -import pkgutil +import shutil import subprocess import sys from pathlib import Path @@ -13,16 +13,76 @@ from meltano.edk.extension import ExtensionBase from meltano.edk.process import Invoker, log_subprocess_error +try: + from importlib.resources import files as ir_files +except ImportError: + from importlib_resources import files as ir_files + log = structlog.get_logger() +class MissingProfileTypeError(Exception): + """Missing profile type error.""" + + pass + + class dbt(ExtensionBase): """Extension implementing the ExtensionBase interface.""" def __init__(self) -> None: - """Initialize the extension.""" - self.dbt_bin = "dbt" # verify this is the correct name - self.dbt_invoker = Invoker(self.dbt_bin) + """Initialize the extension. + + Raises: + MissingProfileTypeError: If the profile type is not set. + """ + self.dbt_bin = "dbt" + self.dbt_ext_type = os.getenv("DBT_EXT_TYPE", None) + if not self.dbt_ext_type: + raise MissingProfileTypeError("DBT_EXT_TYPE must be set") + self.dbt_project_dir = Path(os.getenv("DBT_EXT_PROJECT_DIR", "transform")) + self.dbt_profiles_dir = Path( + os.getenv("DBT_EXT_PROFILES_DIR", self.dbt_project_dir / "transform") + ) + self.dbt_invoker = Invoker(self.dbt_bin, cwd=self.dbt_project_dir) + self.skip_pre_invoke = ( + os.getenv("DBT_EXT_SKIP_PRE_INVOKE", "false").lower() == "true" + ) + + def pre_invoke(self, invoke_name: str | None, *invoke_args: Any) -> None: + """Pre-invoke hook. + + Runs `dbt deps` to ensure dependencies are up-to-date on every invocation. + + Args: + invoke_name: The name of the command that will eventually be invoked. + invoke_args: The arguments that will be passed to the command. + """ + if self.skip_pre_invoke: + log.debug("skipping pre-invoke as DBT_EXT_SKIP_PRE_INVOKE is set") + return + + if invoke_name in ["deps", "clean"]: + log.debug("skipping pre-invoke as command being invoked is deps or clean") + return + + try: + log.info("Extension executing `dbt clean`...") + self.dbt_invoker.run_and_log("clean") + except subprocess.CalledProcessError as err: + log_subprocess_error( + "dbt clean", err, "pre invoke step of `dbt clean` failed" + ) + sys.exit(err.returncode) + + try: + log.info("Extension executing `dbt deps`...") + self.dbt_invoker.run_and_log("deps") + except subprocess.CalledProcessError as err: + log_subprocess_error( + "dbt deps", err, "pre invoke step of `dbt deps` failed" + ) + sys.exit(err.returncode) def invoke(self, command_name: str | None, *command_args: Any) -> None: """Invoke the underlying cli, that is being wrapped by this extension. @@ -32,6 +92,10 @@ def invoke(self, command_name: str | None, *command_args: Any) -> None: command_args: The arguments to pass to the command. """ try: + command_msg = command_name if command_name else self.dbt_bin + if len(command_args) > 0: + command_msg += f" {command_args[0][0]}" + log.info(f"Extension executing `{command_msg}`...") self.dbt_invoker.run_and_log(command_name, *command_args) except subprocess.CalledProcessError as err: log_subprocess_error(f"dbt {command_name}", err, "dbt invocation failed") @@ -54,3 +118,47 @@ def describe(self) -> models.Describe: ), ] ) + + def initialize(self, force: bool = False) -> None: + """Initialize the extension. + + Args: + force: Whether to force initialization. + """ + if not self.dbt_project_dir.exists(): + log.info("creating dbt project directory", path=self.dbt_project_dir) + self.dbt_project_dir.mkdir(parents=True, exist_ok=True) + + for entry in ir_files("files_dbt_ext.bundle.transform").iterdir(): + if entry.name == "__pycache__": + continue + log.debug(f"deploying {entry.name}", entry=entry, dst=self.dbt_project_dir) + if entry.is_file(): + shutil.copy(entry, self.dbt_project_dir / entry.name) + else: + shutil.copytree( + entry, self.dbt_project_dir / entry.name, dirs_exist_ok=True + ) + + if not self.dbt_profiles_dir.exists(): + log.info("creating dbt profiles directory", path=self.dbt_profiles_dir) + self.dbt_profiles_dir.mkdir(parents=True, exist_ok=True) + + for entry in ir_files("files_dbt_ext.profiles").iterdir(): + if entry.name == self.dbt_ext_type and entry.is_dir(): + log.debug( + f"deploying {entry.name} profile", + entry=entry, + dst=self.dbt_profiles_dir, + ) + shutil.copytree(entry, self.dbt_profiles_dir, dirs_exist_ok=True) + break + else: + log.error(f"dbt type {self.dbt_ext_type} had no matching profile.") + + log.info( + "dbt initialized", + dbt_ext_type=self.dbt_ext_type, + dbt_project_dir=self.dbt_project_dir, + dbt_profiles_dir=self.dbt_profiles_dir, + ) diff --git a/files_dbt_ext/bundle/__init__.py b/files_dbt_ext/bundle/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/files_dbt_ext/bundle/transform/.gitignore b/files_dbt_ext/bundle/transform/.gitignore new file mode 100644 index 0000000..087d062 --- /dev/null +++ b/files_dbt_ext/bundle/transform/.gitignore @@ -0,0 +1,3 @@ +target/ +dbt_packages/ +logs/ diff --git a/files_dbt_ext/bundle/transform/__init__.py b/files_dbt_ext/bundle/transform/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/files_dbt_ext/bundle/transform/dbt_project.yml b/files_dbt_ext/bundle/transform/dbt_project.yml new file mode 100644 index 0000000..cd7403d --- /dev/null +++ b/files_dbt_ext/bundle/transform/dbt_project.yml @@ -0,0 +1,26 @@ +name: my_meltano_project +version: '1.0' +profile: meltano +config-version: 2 +require-dbt-version: [">=1.0.0", "<2.0.0"] +model-paths: +- models +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +snapshot-paths: +- snapshots +target-path: ../.meltano/transformers/dbt/target +log-path: logs +packages-install-path: dbt_packages +clean-targets: +- ../.meltano/transformers/dbt/target +- dbt_packages +- logs +models: + my_meltano_project: null diff --git a/files_dbt_ext/bundle/transform/models/.gitkeep b/files_dbt_ext/bundle/transform/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/files_dbt_ext/profiles/__init__.py b/files_dbt_ext/profiles/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/files_dbt_ext/profiles/bigquery/profiles.yaml b/files_dbt_ext/profiles/bigquery/profiles.yaml new file mode 100644 index 0000000..ce1ecc1 --- /dev/null +++ b/files_dbt_ext/profiles/bigquery/profiles.yaml @@ -0,0 +1,39 @@ +config: + send_anonymous_usage_stats: False + use_colors: True +meltano: + target: "{{ env_var('MELTANO_ENVIRONMENT', 'dev') }}" + outputs: + dev: + type: bigquery + method: "{{ env_var('DBT_BIGQUERY_AUTH_METHOD') }}" + project: "{{ env_var('DBT_BIGQUERY_PROJECT') }}" + dataset: "{{ env_var('DBT_BIGQUERY_DATASET') }}" + threads: 2 + refresh_token: "{{ env_var('DBT_BIGQUERY_REFRESH_TOKEN', '') }}" + client_id: "{{ env_var('DBT_BIGQUERY_CLIENT_ID', '') }}" + client_secret: "{{ env_var('DBT_BIGQUERY_CLIENT_SECRET', '') }}" + token_uri: "{{ env_var('DBT_BIGQUERY_TOKEN_URI', '') }}" + keyfile: "{{ env_var('DBT_BIGQUERY_KEYFILE', '') }}" + staging: + type: bigquery + method: "{{ env_var('DBT_BIGQUERY_METHOD') }}" + project: "{{ env_var('DBT_BIGQUERY_PROJECT') }}" + dataset: "{{ env_var('DBT_BIGQUERY_DATASET') }}" + threads: 4 + refresh_token: "{{ env_var('DBT_BIGQUERY_REFRESH_TOKEN', '') }}" + client_id: "{{ env_var('DBT_BIGQUERY_CLIENT_ID', '') }}" + client_secret: "{{ env_var('DBT_BIGQUERY_CLIENT_SECRET', '') }}" + token_uri: "{{ env_var('DBT_BIGQUERY_TOKEN_URI', '') }}" + keyfile: "{{ env_var('DBT_BIGQUERY_KEYFILE', '') }}" + prod: + type: bigquery + method: "{{ env_var('DBT_BIGQUERY_METHOD') }}" + project: "{{ env_var('DBT_BIGQUERY_PROJECT') }}" + dataset: "{{ env_var('DBT_BIGQUERY_DATASET') }}" + threads: 6 + refresh_token: "{{ env_var('DBT_BIGQUERY_REFRESH_TOKEN', '') }}" + client_id: "{{ env_var('DBT_BIGQUERY_CLIENT_ID', '') }}" + client_secret: "{{ env_var('DBT_BIGQUERY_CLIENT_SECRET', '') }}" + token_uri: "{{ env_var('DBT_BIGQUERY_TOKEN_URI', '') }}" + keyfile: "{{ env_var('DBT_BIGQUERY_KEYFILE', '') }}" diff --git a/files_dbt_ext/profiles/duckdb/profiles.yml b/files_dbt_ext/profiles/duckdb/profiles.yml new file mode 100644 index 0000000..1819f18 --- /dev/null +++ b/files_dbt_ext/profiles/duckdb/profiles.yml @@ -0,0 +1,18 @@ +config: + send_anonymous_usage_stats: False + use_colors: True +meltano: + target: "{{ env_var('MELTANO_ENVIRONMENT', 'dev') }}" + outputs: + dev: + type: duckdb + path: "{{ env_var('DBT_DUCKDB_PATH') }}" + threads: 4 + staging: + type: duckdb + path: "{{ env_var('DBT_DUCKDB_PATH') }}" + threads: 4 + prod: + type: duckdb + path: "{{ env_var('DBT_DUCKDB_PATH') }}" + threads: 4 diff --git a/files_dbt_ext/profiles/postgres/profiles.yml b/files_dbt_ext/profiles/postgres/profiles.yml new file mode 100644 index 0000000..72f69d9 --- /dev/null +++ b/files_dbt_ext/profiles/postgres/profiles.yml @@ -0,0 +1,49 @@ +# Postgres config reference: +config: + send_anonymous_usage_stats: False + use_colors: True +meltano: + target: "{{ env_var('MELTANO_ENVIRONMENT', 'dev') }}" + outputs: + dev: + type: postgres + host: "{{ env_var('DBT_POSTGRES_HOST') }}" + user: "{{ env_var('DBT_POSTGRES_USER') }}" + password: "{{ env_var('DBT_POSTGRES_PASSWORD') }}" + port: "{{ env_var('DBT_POSTGRES_PORT') | int }}" + dbname: "{{ env_var('DBT_POSTGRES_DBNAME', '') }}" + schema: "{{ env_var('DBT_POSTGRES_SCHEMA') }}" + threads: 2 + keepalives_idle: 0 # default 0, indicating the system default + connect_timeout: 10 # default 10 seconds + search_path: "{{ env_var('DBT_POSTGRES_SEARCH_PATH', '') }}" # optional, override the default postgres search_path + role: "{{ env_var('DBT_POSTGRES_ROLE', '') }}" # optional, set the role dbt assumes when executing queries + # sslmode: "{{ env_var('DBT_POSTGRES_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database + staging: + type: postgres + host: "{{ env_var('DBT_POSTGRES_HOST') }}" + user: "{{ env_var('DBT_POSTGRES_USER') }}" + password: "{{ env_var('DBT_POSTGRES_PASSWORD') }}" + port: "{{ env_var('DBT_POSTGRES_PORT') | int }}" + dbname: "{{ env_var('DBT_POSTGRES_DBNAME', '') }}" + schema: "{{ env_var('DBT_POSTGRES_SCHEMA') }}" + threads: 4 + keepalives_idle: 0 # default 0, indicating the system default + connect_timeout: 10 # default 10 seconds + search_path: "{{ env_var('DBT_POSTGRES_SEARCH_PATH', '') }}" # optional, override the default postgres search_path + role: "{{ env_var('DBT_POSTGRES_ROLE', '') }}" # optional, set the role dbt assumes when executing queries + # sslmode: "{{ env_var('DBT_POSTGRES_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database + prod: + type: postgres + host: "{{ env_var('DBT_POSTGRES_HOST') }}" + user: "{{ env_var('DBT_POSTGRES_USER') }}" + password: "{{ env_var('DBT_POSTGRES_PASSWORD') }}" + port: "{{ env_var('DBT_POSTGRES_PORT') | int }}" + dbname: "{{ env_var('DBT_POSTGRES_DBNAME', '') }}" + schema: "{{ env_var('DBT_POSTGRES_SCHEMA') }}" + threads: 6 + keepalives_idle: 0 # default 0, indicating the system default + connect_timeout: 10 # default 10 seconds + search_path: "{{ env_var('DBT_POSTGRES_SEARCH_PATH', '') }}" # optional, override the default postgres search_path + role: "{{ env_var('DBT_POSTGRES_ROLE', '') }}" # optional, set the role dbt assumes when executing queries + # sslmode: "{{ env_var('DBT_POSTGRES_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database diff --git a/files_dbt_ext/profiles/redshift/profiles.yml b/files_dbt_ext/profiles/redshift/profiles.yml new file mode 100644 index 0000000..a9be03c --- /dev/null +++ b/files_dbt_ext/profiles/redshift/profiles.yml @@ -0,0 +1,63 @@ +config: + send_anonymous_usage_stats: False + use_colors: True +meltano: + target: "{{ env_var('MELTANO_ENVIRONMENT', 'dev') }}" + outputs: + dev: + type: redshift + method: "{{ env_var('DBT_REDSHIFT_AUTH_METHOD', 'iam')}}" + cluster_id: "{{ env_var('DBT_REDSHIFT_CLUSTER_ID') }}" + host: "{{ env_var('DBT_REDSHIFT_HOST', '') }}" + user: "{{ env_var('DBT_REDSHIFT_USER', '') }}" + iam_profile: "{{ env_var('DBT_REDSHIFT_IAM_PROFILE', '') }}" + iam_duration_seconds: "{{ env_var('DBT_REDSHIFT_IAM_DURATION_SECONDS', 900) | int }}" # optional + autocreate: "{{ env_var('DBT_REDSHIFT_AUTOCREATE', False ) }}" # optional + # db_groups: "{{ env_var('DBT_REDSHIFT_DB_GROUPS', '').split() }}" # optional + password: "{{ env_var ('DBT_REDSHIFT_PASSWORD', '') }}" + port: "{{ env_var('DBT_REDSHIFT_PORT', 5439) | int }}" + dbname: "{{ env_var('DBT_REDSHIFT_DBNAME') }}" + schema: "{{ env_var('DBT_REDSHIFT_SCHEMA') }}" + threads: 2 + keepalives_idle: "{{ env_var('DBT_REDSHIFT_KEEPALIVES_IDLE', 240) | int }}" + search_path: "{{ env_var('DBT_REDSHIFT_SEARCH_PATH', 'public')}}" # optional, but not recommended + # sslmode: "{{ env_var('DBT_REDSHIFT_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database + ra3_node: "{{ env_var('DBT_REDSHIFT_RA3_NODE', False ) }}" # optional, enables cross-database sources + staging: + type: redshift + method: "{{ env_var('DBT_REDSHIFT_AUTH_METHOD', 'iam')}}" + cluster_id: "{{ env_var('DBT_REDSHIFT_CLUSTER_ID') }}" + host: "{{ env_var('DBT_REDSHIFT_HOST', '') }}" + user: "{{ env_var('DBT_REDSHIFT_USER', '') }}" + iam_profile: "{{ env_var('DBT_REDSHIFT_IAM_PROFILE', '') }}" + iam_duration_seconds: "{{ env_var('DBT_REDSHIFT_IAM_DURATION_SECONDS', '') }}" # optional + autocreate: "{{ env_var('DBT_REDSHIFT_AUTOCREATE', '') }}" # optional + # db_groups: "{{ env_var('DBT_REDSHIFT_DB_GROUPS', '').split() }}" # optional + password: "{{ env_var ('DBT_REDSHIFT_PASSWORD', '') }}" + port: "{{ env_var('DBT_REDSHIFT_PORT', 5439) | int }}" + dbname: "{{ env_var('DBT_REDSHIFT_DBNAME') }}" + schema: "{{ env_var('DBT_REDSHIFT_SCHEMA') }}" + threads: 2 + keepalives_idle: "{{ env_var('DBT_REDSHIFT_KEEPALIVES_IDLE', 240) | int }}" + search_path: "{{ env_var('DBT_REDSHIFT_SEARCH_PATH', 'public')}}" # optional, but not recommended + # sslmode: "{{ env_var('DBT_REDSHIFT_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database + ra3_node: "{{ env_var('DBT_REDSHIFT_RA3_NODE', '') }}" # optional, enables cross-database sources + prod: + type: redshift + method: "{{ env_var('DBT_REDSHIFT_AUTH_METHOD', 'iam')}}" + cluster_id: "{{ env_var('DBT_REDSHIFT_CLUSTER_ID') }}" + host: "{{ env_var('DBT_REDSHIFT_HOST', '') }}" + user: "{{ env_var('DBT_REDSHIFT_USER', '') }}" + iam_profile: "{{ env_var('DBT_REDSHIFT_IAM_PROFILE', '') }}" + iam_duration_seconds: "{{ env_var('DBT_REDSHIFT_IAM_DURATION_SECONDS', '') }}" # optional + autocreate: "{{ env_var('DBT_REDSHIFT_AUTOCREATE', '') }}" # optional + # db_groups: "{{ env_var('DBT_REDSHIFT_DB_GROUPS', '').split() }}" # optional + password: "{{ env_var ('DBT_REDSHIFT_PASSWORD', '') }}" + port: "{{ env_var('DBT_REDSHIFT_PORT', 5439) | int }}" + dbname: "{{ env_var('DBT_REDSHIFT_DBNAME') }}" + schema: "{{ env_var('DBT_REDSHIFT_SCHEMA') }}" + threads: 2 + keepalives_idle: "{{ env_var('DBT_REDSHIFT_KEEPALIVES_IDLE', 240) | int }}" + search_path: "{{ env_var('DBT_REDSHIFT_SEARCH_PATH', 'public')}}" # optional, but not recommended + # sslmode: "{{ env_var('DBT_REDSHIFT_SSLMODE', '').split() }}" # optional, set the sslmode used to connect to the database + ra3_node: "{{ env_var('DBT_REDSHIFT_RA3_NODE', '') }}" # optional, enables cross-database sources diff --git a/files_dbt_ext/profiles/snowflake/profiles.yml b/files_dbt_ext/profiles/snowflake/profiles.yml new file mode 100644 index 0000000..a5e52a1 --- /dev/null +++ b/files_dbt_ext/profiles/snowflake/profiles.yml @@ -0,0 +1,41 @@ +# Snowflake config reference: https://docs.getdbt.com/reference/warehouse-profiles/snowflake-profile +# +# Warning: Do not store credentials in this file unless you've take the necessary steps to remove +# this file from your git commit. +config: + send_anonymous_usage_stats: False + use_colors: True +meltano: + target: "{{ env_var('MELTANO_ENVIRONMENT', 'dev') }}" # The default profile name if no other profile is selected. + outputs: + # TODO: Optionally, revise or add to these profiles to reflect your own environments. Meltano adds dev, staging and prod by default. + dev: + type: snowflake + threads: 2 + account: "{{ env_var('DBT_SNOWFLAKE_ACCOUNT') }}" + user: "{{ env_var('DBT_SNOWFLAKE_USER') }}" + password: "{{ env_var('DBT_SNOWFLAKE_PASSWORD') }}" + role: "{{ env_var('DBT_SNOWFLAKE_ROLE') }}" + warehouse: "{{ env_var('DBT_SNOWFLAKE_WAREHOUSE') }}" + database: "{{ env_var('DBT_SNOWFLAKE_DATABASE') }}" + schema: "{{ env_var('DBT_SNOWFLAKE_SCHEMA') }}" + staging: + type: snowflake + threads: 4 + account: "{{ env_var('DBT_SNOWFLAKE_ACCOUNT') }}" + user: "{{ env_var('DBT_SNOWFLAKE_USER') }}" + password: "{{ env_var('DBT_SNOWFLAKE_PASSWORD') }}" + role: "{{ env_var('DBT_SNOWFLAKE_ROLE') }}" + warehouse: "{{ env_var('DBT_SNOWFLAKE_WAREHOUSE') }}" + database: "{{ env_var('DBT_SNOWFLAKE_DATABASE') }}" + schema: "{{ env_var('DBT_SNOWFLAKE_SCHEMA') }}" + prod: + type: snowflake + threads: 6 + account: "{{ env_var('DBT_SNOWFLAKE_ACCOUNT') }}" + user: "{{ env_var('DBT_SNOWFLAKE_USER') }}" + password: "{{ env_var('DBT_SNOWFLAKE_PASSWORD') }}" + role: "{{ env_var('DBT_SNOWFLAKE_ROLE') }}" + warehouse: "{{ env_var('DBT_SNOWFLAKE_WAREHOUSE') }}" + database: "{{ env_var('DBT_SNOWFLAKE_DATABASE') }}" + schema: "{{ env_var('DBT_SNOWFLAKE_SCHEMA') }}" diff --git a/poetry.lock b/poetry.lock index e7a9a4d..efd8c88 100644 --- a/poetry.lock +++ b/poetry.lock @@ -162,6 +162,21 @@ docs = ["jaraco.packaging (>=9)", "rst.linker (>=1.9)", "sphinx"] perf = ["ipython"] testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] +[[package]] +name = "importlib-resources" +version = "5.9.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + [[package]] name = "iniconfig" version = "1.1.1" @@ -211,7 +226,7 @@ structlog = "^21.2.0" type = "git" url = "https://github.com/meltano/edk.git" reference = "main" -resolved_reference = "f2bdfe193c57debf0e8b94361f9e30ad0239d396" +resolved_reference = "5c84bd76bc9095e938b1f47348ffb0c99fe91aed" [[package]] name = "mypy-extensions" @@ -493,7 +508,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = "<3.11,>=3.7" -content-hash = "5f4d0f054f32b9d406c593fb7320c5c70f402be09d2071e888670372035a9cd0" +content-hash = "ce1e08c613c8d71bcb229eb74961514db9d0b94e6eaebb8ca2575944258f01a1" [metadata.files] asttokens = [ @@ -569,6 +584,10 @@ importlib-metadata = [ {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"}, ] +importlib-resources = [ + {file = "importlib_resources-5.9.0-py3-none-any.whl", hash = "sha256:f78a8df21a79bcc30cfd400bdc38f314333de7c0fb619763f6b9dabab8268bb7"}, + {file = "importlib_resources-5.9.0.tar.gz", hash = "sha256:5481e97fb45af8dcf2f798952625591c58fe599d0735d86b10f54de086a61681"}, +] iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, @@ -676,6 +695,13 @@ PyYAML = [ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, + {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, + {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, diff --git a/pyproject.toml b/pyproject.toml index 4d4e420..e3c113b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,17 @@ homepage = "https://meltano.com" repository = "https://github.com/meltano/dbt-ext" license = "Apache 2.0" +packages = [ + { include = "dbt_ext" }, + { include = "files_dbt_ext" }, +] + +# If you need to static assets with your extension, you can add them here. +# Under a directory path of "files_dbt-ext/" +include = [ + {path = "files_dbt_ext"}, +] + [tool.poetry.dependencies] python = "<3.11,>=3.7" structlog = "^21.2.0" @@ -17,6 +28,7 @@ pydantic = "^1.9.0" click = "^8.1.3" typer = "^0.6.1" "meltano.edk"= {git = "https://github.com/meltano/edk.git", rev="main"} +importlib-resources = "^5.9.0" [tool.poetry.dev-dependencies] black = "^22.3.0" @@ -31,18 +43,6 @@ pytest = "^7.1.3" requires = ["poetry-core>=1.0.8"] build-backend = "poetry.core.masonry.api" -packages = [ - { include = "dbt_ext" } - # uncomment this if you want to include static assets - # { include = "files_dbt-ext.*" } -] - -# If you need to static assets with your extension, you can add them here. -# Under a directory path of "files_dbt-ext/" -#include = [ -# "files_dbt-ext/somefile.thing" -#] - [tool.poetry.scripts] dbt_extension = 'dbt_ext.main:app' dbt_invoker = 'dbt_ext.pass_through:pass_through_cli' diff --git a/tests/test_dbt_ext.py b/tests/test_dbt_ext.py index 6430e73..1d1824d 100644 --- a/tests/test_dbt_ext.py +++ b/tests/test_dbt_ext.py @@ -1,5 +1,40 @@ -from dbt_ext.main import app +from unittest.mock import patch -def test_app_name(): +def test_app_name(monkeypatch): + monkeypatch.setenv("DBT_EXT_TYPE", "test") + from dbt_ext.main import app + assert app.info.name == "dbt" + + +def test_pre_invoke(monkeypatch): + """Verify that pre-invokes are skipped for deps and clean.""" + monkeypatch.setenv("DBT_EXT_TYPE", "test") + from dbt_ext.main import dbt + + dbt_ext = dbt() + + dbt_ext.skip_pre_invoke = True + with patch.object(dbt_ext.dbt_invoker, "run_and_log") as mock_run_and_log: + dbt_ext.pre_invoke("deps", []) + mock_run_and_log.assert_not_called() + + dbt_ext.pre_invoke("clean", []) + mock_run_and_log.assert_not_called() + + dbt_ext.pre_invoke("run", []) + mock_run_and_log.assert_not_called() + + dbt_ext.skip_pre_invoke = False + with patch.object(dbt_ext.dbt_invoker, "run_and_log") as mock_run_and_log: + dbt_ext.pre_invoke("deps", []) + mock_run_and_log.assert_not_called() + + dbt_ext.pre_invoke("clean", []) + mock_run_and_log.assert_not_called() + + dbt_ext.pre_invoke("run", []) + assert mock_run_and_log.call_count == 2 + assert mock_run_and_log.call_args_list[0][0][0] == "clean" + assert mock_run_and_log.call_args_list[1][0][0] == "deps"