Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 2d624d6

Browse files
authored
Merge pull request #356 from datafold/278-support-env-variables
Add env variables support in .toml config
2 parents c2eaa56 + c4f2f51 commit 2d624d6

File tree

2 files changed

+74
-0
lines changed

2 files changed

+74
-0
lines changed

data_diff/config.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import re
2+
import os
13
from typing import Any, Dict
24
import toml
35

@@ -11,6 +13,8 @@ def is_uri(s: str) -> bool:
1113

1214

1315
def _apply_config(config: Dict[str, Any], run_name: str, kw: Dict[str, Any]):
16+
_resolve_env(config)
17+
1418
# Load config
1519
databases = config.pop("database", {})
1620
runs = config.pop("run", {})
@@ -77,6 +81,32 @@ def _apply_config(config: Dict[str, Any], run_name: str, kw: Dict[str, Any]):
7781
return new_kw
7882

7983

84+
# There are no strict requirements for the environment variable name format.
85+
# But most shells only allow alphanumeric characters and underscores.
86+
# https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html
87+
# "Environment variable names (...) consist solely of uppercase letters, digits, and the '_' (underscore)"
88+
_ENV_VAR_PATTERN = r"\$\{([A-Za-z0-9_]+)\}"
89+
90+
91+
def _resolve_env(config: Dict[str, Any]):
92+
"""
93+
Resolve environment variables referenced as ${ENV_VAR_NAME}.
94+
Missing environment variables are replaced with an empty string.
95+
"""
96+
for key, value in config.items():
97+
if isinstance(value, dict):
98+
_resolve_env(value)
99+
elif isinstance(value, str):
100+
config[key] = re.sub(_ENV_VAR_PATTERN, _replace_match, value)
101+
102+
103+
def _replace_match(match: re.Match) -> str:
104+
# Lookup referenced variable in environment.
105+
# Replace with empty string if not found
106+
referenced_var = match.group(1) # group(0) is the whole string
107+
return os.environ.get(referenced_var, "")
108+
109+
80110
def apply_config_from_file(path: str, run_name: str, kw: Dict[str, Any]):
81111
with open(path) as f:
82112
return _apply_config(toml.load(f), run_name, kw)

tests/test_config.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import unittest
23

34
from data_diff.config import apply_config_from_string, ConfigParseError
@@ -59,3 +60,46 @@ def test_remove_password(self):
5960
expected = url.replace("PASS", replace_with)
6061
removed = remove_password_from_url(url, replace_with)
6162
self.assertEqual(removed, expected)
63+
64+
def test_embed_env(self):
65+
env = {
66+
"DRIVER": "postgresql",
67+
"USER": "postgres",
68+
"PASSWORD": "Password1",
69+
"RUN_PG_1_DATABASE": "test_postgresql",
70+
"RUN_PG_1_TABLE": "rating",
71+
"RUN_PG_2_DATABASE": "postgresql://postgres:Password1@/",
72+
"RUN_PG_2_TABLE": "rating_del1",
73+
}
74+
config = r"""
75+
[database.test_postgresql]
76+
driver = "${DRIVER}"
77+
user = "${USER}"
78+
password = "${PASSWORD}"
79+
80+
[run.default]
81+
update_column = "${UPDATE_COLUMN}"
82+
verbose = true
83+
threads = 2
84+
85+
[run.pg_pg]
86+
threads = 4
87+
1.database = "${RUN_PG_1_DATABASE}"
88+
1.table = "${RUN_PG_1_TABLE}"
89+
1.threads = 11
90+
2.database = "${RUN_PG_2_DATABASE}"
91+
2.table = "${RUN_PG_2_TABLE}"
92+
2.threads = 22
93+
"""
94+
95+
os.environ.update(env)
96+
res = apply_config_from_string(config, "pg_pg", {})
97+
assert res["update_column"] == "" # missing env var
98+
assert res["verbose"] is True
99+
assert res["threads"] == 4 # overwritten by pg_pg
100+
assert res["database1"] == {"driver": "postgresql", "user": "postgres", "password": "Password1"}
101+
assert res["database2"] == "postgresql://postgres:Password1@/"
102+
assert res["table1"] == "rating"
103+
assert res["table2"] == "rating_del1"
104+
assert res["threads1"] == 11
105+
assert res["threads2"] == 22

0 commit comments

Comments
 (0)