Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Add env variables support in .toml config #356

Merged
merged 1 commit into from
Jan 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions data_diff/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re
import os
from typing import Any, Dict
import toml

Expand All @@ -11,6 +13,8 @@ def is_uri(s: str) -> bool:


def _apply_config(config: Dict[str, Any], run_name: str, kw: Dict[str, Any]):
_resolve_env(config)

# Load config
databases = config.pop("database", {})
runs = config.pop("run", {})
Expand Down Expand Up @@ -77,6 +81,32 @@ def _apply_config(config: Dict[str, Any], run_name: str, kw: Dict[str, Any]):
return new_kw


# There are no strict requirements for the environment variable name format.
# But most shells only allow alphanumeric characters and underscores.
# https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html
# "Environment variable names (...) consist solely of uppercase letters, digits, and the '_' (underscore)"
_ENV_VAR_PATTERN = r"\$\{([A-Za-z0-9_]+)\}"


def _resolve_env(config: Dict[str, Any]):
"""
Resolve environment variables referenced as ${ENV_VAR_NAME}.
Missing environment variables are replaced with an empty string.
"""
for key, value in config.items():
if isinstance(value, dict):
_resolve_env(value)
elif isinstance(value, str):
config[key] = re.sub(_ENV_VAR_PATTERN, _replace_match, value)


def _replace_match(match: re.Match) -> str:
# Lookup referenced variable in environment.
# Replace with empty string if not found
referenced_var = match.group(1) # group(0) is the whole string
return os.environ.get(referenced_var, "")


def apply_config_from_file(path: str, run_name: str, kw: Dict[str, Any]):
with open(path) as f:
return _apply_config(toml.load(f), run_name, kw)
Expand Down
44 changes: 44 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import unittest

from data_diff.config import apply_config_from_string, ConfigParseError
Expand Down Expand Up @@ -59,3 +60,46 @@ def test_remove_password(self):
expected = url.replace("PASS", replace_with)
removed = remove_password_from_url(url, replace_with)
self.assertEqual(removed, expected)

def test_embed_env(self):
env = {
"DRIVER": "postgresql",
"USER": "postgres",
"PASSWORD": "Password1",
"RUN_PG_1_DATABASE": "test_postgresql",
"RUN_PG_1_TABLE": "rating",
"RUN_PG_2_DATABASE": "postgresql://postgres:Password1@/",
"RUN_PG_2_TABLE": "rating_del1",
}
config = r"""
[database.test_postgresql]
driver = "${DRIVER}"
user = "${USER}"
password = "${PASSWORD}"

[run.default]
update_column = "${UPDATE_COLUMN}"
verbose = true
threads = 2

[run.pg_pg]
threads = 4
1.database = "${RUN_PG_1_DATABASE}"
1.table = "${RUN_PG_1_TABLE}"
1.threads = 11
2.database = "${RUN_PG_2_DATABASE}"
2.table = "${RUN_PG_2_TABLE}"
2.threads = 22
"""

os.environ.update(env)
res = apply_config_from_string(config, "pg_pg", {})
assert res["update_column"] == "" # missing env var
assert res["verbose"] is True
assert res["threads"] == 4 # overwritten by pg_pg
assert res["database1"] == {"driver": "postgresql", "user": "postgres", "password": "Password1"}
assert res["database2"] == "postgresql://postgres:Password1@/"
assert res["table1"] == "rating"
assert res["table2"] == "rating_del1"
assert res["threads1"] == 11
assert res["threads2"] == 22