Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 4ce9f39

Browse files
committed
squash dbt integration change
1 parent c2eaa56 commit 4ce9f39

File tree

7 files changed

+2281
-134
lines changed

7 files changed

+2281
-134
lines changed

data_diff/__main__.py

Lines changed: 115 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,14 @@
1313
from sqeleton.schema import create_schema
1414
from sqeleton.queries.api import current_timestamp
1515

16+
from .dbt import DbtDiffer
1617
from .utils import eval_name_template, remove_password_from_url, safezip, match_like
1718
from .diff_tables import Algorithm
18-
from .hashdiff_tables import HashDiffer, DEFAULT_BISECTION_THRESHOLD, DEFAULT_BISECTION_FACTOR
19+
from .hashdiff_tables import (
20+
HashDiffer,
21+
DEFAULT_BISECTION_THRESHOLD,
22+
DEFAULT_BISECTION_FACTOR,
23+
)
1924
from .joindiff_tables import TABLE_WRITE_LIMIT, JoinDiffer
2025
from .table_segment import TableSegment
2126
from .databases import connect
@@ -84,9 +89,13 @@ def __init__(self, **kwargs):
8489
def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -> None:
8590
self.write(f"data-diff v{__version__} - efficiently diff rows across database tables.\n\n")
8691
self.write("Usage:\n")
87-
self.write(f" * In-db diff: {prog} <database_a> <table_a> <table_b> [OPTIONS]\n")
88-
self.write(f" * Cross-db diff: {prog} <database_a> <table_a> <database_b> <table_b> [OPTIONS]\n")
89-
self.write(f" * Using config: {prog} --conf PATH [--run NAME] [OPTIONS]\n")
92+
self.write(
93+
f" * In-db diff: data-diff --database1 <database_a> --table1 <table_a> --table2 <table_b> [OPTIONS]\n"
94+
)
95+
self.write(
96+
f" * Cross-db diff: data-diff --database1 <database_a> --table1 <table_a> --database2 <database_b> --table2 <table_b> [OPTIONS]\n"
97+
)
98+
self.write(f" * Using config: data-diff --conf PATH [--run NAME] [OPTIONS]\n")
9099

91100

92101
click.Context.formatter_class = MyHelpFormatter
@@ -98,9 +107,20 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
98107
@click.argument("database2", required=False)
99108
@click.argument("table2", required=False)
100109
@click.option(
101-
"-k", "--key-columns", default=[], multiple=True, help="Names of primary key columns. Default='id'.", metavar="NAME"
110+
"-k",
111+
"--key-columns",
112+
default=[],
113+
multiple=True,
114+
help="Names of primary key columns. Default='id'.",
115+
metavar="NAME",
116+
)
117+
@click.option(
118+
"-t",
119+
"--update-column",
120+
default=None,
121+
help="Name of updated_at/last_updated column",
122+
metavar="NAME",
102123
)
103-
@click.option("-t", "--update-column", default=None, help="Name of updated_at/last_updated column", metavar="NAME")
104124
@click.option(
105125
"-c",
106126
"--columns",
@@ -111,7 +131,13 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
111131
"Accepts a name or a pattern like in SQL. Example: -c col% -c another_col",
112132
metavar="NAME",
113133
)
114-
@click.option("-l", "--limit", default=None, help="Maximum number of differences to find", metavar="NUM")
134+
@click.option(
135+
"-l",
136+
"--limit",
137+
default=None,
138+
help="Maximum number of differences to find",
139+
metavar="NUM",
140+
)
115141
@click.option(
116142
"--bisection-factor",
117143
default=None,
@@ -140,15 +166,27 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
140166
metavar="AGE",
141167
)
142168
@click.option(
143-
"--max-age", default=None, help="Considers only rows younger than specified. See --min-age.", metavar="AGE"
169+
"--max-age",
170+
default=None,
171+
help="Considers only rows younger than specified. See --min-age.",
172+
metavar="AGE",
144173
)
145174
@click.option("-s", "--stats", is_flag=True, help="Print stats instead of a detailed diff")
146175
@click.option("-d", "--debug", is_flag=True, help="Print debug info")
147-
@click.option("--json", "json_output", is_flag=True, help="Print JSONL output for machine readability")
176+
@click.option(
177+
"--json",
178+
"json_output",
179+
is_flag=True,
180+
help="Print JSONL output for machine readability",
181+
)
148182
@click.option("-v", "--verbose", is_flag=True, help="Print extra info")
149183
@click.option("--version", is_flag=True, help="Print version info and exit")
150184
@click.option("-i", "--interactive", is_flag=True, help="Confirm queries, implies --debug")
151-
@click.option("--no-tracking", is_flag=True, help="data-diff sends home anonymous usage data. Use this to disable it.")
185+
@click.option(
186+
"--no-tracking",
187+
is_flag=True,
188+
help="data-diff sends home anonymous usage data. Use this to disable it.",
189+
)
152190
@click.option(
153191
"--case-sensitive",
154192
is_flag=True,
@@ -185,9 +223,18 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
185223
metavar="COUNT",
186224
)
187225
@click.option(
188-
"-w", "--where", default=None, help="An additional 'where' expression to restrict the search space. Beware of SQL Injection!", metavar="EXPR"
226+
"-w",
227+
"--where",
228+
default=None,
229+
help="An additional 'where' expression to restrict the search space. Beware of SQL Injection!",
230+
metavar="EXPR",
231+
)
232+
@click.option(
233+
"-a",
234+
"--algorithm",
235+
default=Algorithm.AUTO.value,
236+
type=click.Choice([i.value for i in Algorithm]),
189237
)
190-
@click.option("-a", "--algorithm", default=Algorithm.AUTO.value, type=click.Choice([i.value for i in Algorithm]))
191238
@click.option(
192239
"--conf",
193240
default=None,
@@ -200,24 +247,74 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
200247
help="Name of run-configuration to run. If used, CLI arguments for database and table must be omitted.",
201248
metavar="NAME",
202249
)
250+
@click.option(
251+
"--dbt",
252+
is_flag=True,
253+
help="Run dbt diff.",
254+
)
255+
@click.option(
256+
"--dbt-cloud",
257+
is_flag=True,
258+
help="Run dbt diff on Datafold cloud. Expects an api key on env var DATAFOLD_API_KEY.",
259+
)
260+
@click.option(
261+
"--dbt-profiles-dir",
262+
default=None,
263+
metavar="PATH",
264+
help="Override the default dbt profile location (~/.dbt).",
265+
)
266+
@click.option(
267+
"--dbt-project-dir",
268+
default=None,
269+
metavar="PATH",
270+
help="Override the dbt project directory. Otherwise assumed to be the current directory.",
271+
)
203272
def main(conf, run, **kw):
204273
if kw["table2"] is None and kw["database2"]:
205274
# Use the "database table table" form
206275
kw["table2"] = kw["database2"]
207276
kw["database2"] = kw["database1"]
208277

278+
if kw["version"]:
279+
print(f"v{__version__}")
280+
return
281+
209282
if conf:
210283
kw = apply_config_from_file(conf, run, kw)
211284

285+
if kw["no_tracking"]:
286+
disable_tracking()
287+
288+
if kw.get("interactive"):
289+
kw["debug"] = True
290+
291+
if kw["debug"]:
292+
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT)
293+
if kw.get("__conf__"):
294+
kw["__conf__"] = deepcopy(kw["__conf__"])
295+
_remove_passwords_in_dict(kw["__conf__"])
296+
logging.debug(f"Applied run configuration: {kw['__conf__']}")
297+
elif kw.get("verbose"):
298+
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT)
299+
else:
300+
logging.basicConfig(level=logging.WARNING, format=LOG_FORMAT, datefmt=DATE_FORMAT)
301+
212302
try:
213-
return _main(**kw)
303+
if kw["dbt"] or kw["dbt_cloud"]:
304+
DbtDiffer.diff(
305+
profiles_dir_override=kw["dbt_profiles_dir"],
306+
project_dir_override=kw["dbt_project_dir"],
307+
is_cloud=kw["dbt_cloud"],
308+
)
309+
else:
310+
return _data_diff(**kw)
214311
except Exception as e:
215312
logging.error(e)
216313
if kw["debug"]:
217314
raise
218315

219316

220-
def _main(
317+
def _data_diff(
221318
database1,
222319
table1,
223320
database2,
@@ -246,31 +343,14 @@ def _main(
246343
materialize_all_rows,
247344
table_write_limit,
248345
materialize_to_table,
346+
dbt,
347+
dbt_cloud,
348+
dbt_profiles_dir,
349+
dbt_project_dir,
249350
threads1=None,
250351
threads2=None,
251352
__conf__=None,
252353
):
253-
if version:
254-
print(f"v{__version__}")
255-
return
256-
257-
if no_tracking:
258-
disable_tracking()
259-
260-
if interactive:
261-
debug = True
262-
263-
if debug:
264-
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT)
265-
if __conf__:
266-
__conf__ = deepcopy(__conf__)
267-
_remove_passwords_in_dict(__conf__)
268-
logging.debug(f"Applied run configuration: {__conf__}")
269-
elif verbose:
270-
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT)
271-
else:
272-
logging.basicConfig(level=logging.WARNING, format=LOG_FORMAT, datefmt=DATE_FORMAT)
273-
274354
if limit and stats:
275355
logging.error("Cannot specify a limit when using the -s/--stats switch")
276356
return

0 commit comments

Comments
 (0)