Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 27e0a87

Browse files
committed
squash add dbt integration
1 parent c2eaa56 commit 27e0a87

File tree

6 files changed

+2266
-129
lines changed

6 files changed

+2266
-129
lines changed

data_diff/__main__.py

Lines changed: 108 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,14 @@
1313
from sqeleton.schema import create_schema
1414
from sqeleton.queries.api import current_timestamp
1515

16+
from .dbt import DbtDiffer
1617
from .utils import eval_name_template, remove_password_from_url, safezip, match_like
1718
from .diff_tables import Algorithm
18-
from .hashdiff_tables import HashDiffer, DEFAULT_BISECTION_THRESHOLD, DEFAULT_BISECTION_FACTOR
19+
from .hashdiff_tables import (
20+
HashDiffer,
21+
DEFAULT_BISECTION_THRESHOLD,
22+
DEFAULT_BISECTION_FACTOR,
23+
)
1924
from .joindiff_tables import TABLE_WRITE_LIMIT, JoinDiffer
2025
from .table_segment import TableSegment
2126
from .databases import connect
@@ -98,9 +103,20 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
98103
@click.argument("database2", required=False)
99104
@click.argument("table2", required=False)
100105
@click.option(
101-
"-k", "--key-columns", default=[], multiple=True, help="Names of primary key columns. Default='id'.", metavar="NAME"
106+
"-k",
107+
"--key-columns",
108+
default=[],
109+
multiple=True,
110+
help="Names of primary key columns. Default='id'.",
111+
metavar="NAME",
112+
)
113+
@click.option(
114+
"-t",
115+
"--update-column",
116+
default=None,
117+
help="Name of updated_at/last_updated column",
118+
metavar="NAME",
102119
)
103-
@click.option("-t", "--update-column", default=None, help="Name of updated_at/last_updated column", metavar="NAME")
104120
@click.option(
105121
"-c",
106122
"--columns",
@@ -111,7 +127,13 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
111127
"Accepts a name or a pattern like in SQL. Example: -c col% -c another_col",
112128
metavar="NAME",
113129
)
114-
@click.option("-l", "--limit", default=None, help="Maximum number of differences to find", metavar="NUM")
130+
@click.option(
131+
"-l",
132+
"--limit",
133+
default=None,
134+
help="Maximum number of differences to find",
135+
metavar="NUM",
136+
)
115137
@click.option(
116138
"--bisection-factor",
117139
default=None,
@@ -140,15 +162,27 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
140162
metavar="AGE",
141163
)
142164
@click.option(
143-
"--max-age", default=None, help="Considers only rows younger than specified. See --min-age.", metavar="AGE"
165+
"--max-age",
166+
default=None,
167+
help="Considers only rows younger than specified. See --min-age.",
168+
metavar="AGE",
144169
)
145170
@click.option("-s", "--stats", is_flag=True, help="Print stats instead of a detailed diff")
146171
@click.option("-d", "--debug", is_flag=True, help="Print debug info")
147-
@click.option("--json", "json_output", is_flag=True, help="Print JSONL output for machine readability")
172+
@click.option(
173+
"--json",
174+
"json_output",
175+
is_flag=True,
176+
help="Print JSONL output for machine readability",
177+
)
148178
@click.option("-v", "--verbose", is_flag=True, help="Print extra info")
149179
@click.option("--version", is_flag=True, help="Print version info and exit")
150180
@click.option("-i", "--interactive", is_flag=True, help="Confirm queries, implies --debug")
151-
@click.option("--no-tracking", is_flag=True, help="data-diff sends home anonymous usage data. Use this to disable it.")
181+
@click.option(
182+
"--no-tracking",
183+
is_flag=True,
184+
help="data-diff sends home anonymous usage data. Use this to disable it.",
185+
)
152186
@click.option(
153187
"--case-sensitive",
154188
is_flag=True,
@@ -185,9 +219,18 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
185219
metavar="COUNT",
186220
)
187221
@click.option(
188-
"-w", "--where", default=None, help="An additional 'where' expression to restrict the search space. Beware of SQL Injection!", metavar="EXPR"
222+
"-w",
223+
"--where",
224+
default=None,
225+
help="An additional 'where' expression to restrict the search space. Beware of SQL Injection!",
226+
metavar="EXPR",
227+
)
228+
@click.option(
229+
"-a",
230+
"--algorithm",
231+
default=Algorithm.AUTO.value,
232+
type=click.Choice([i.value for i in Algorithm]),
189233
)
190-
@click.option("-a", "--algorithm", default=Algorithm.AUTO.value, type=click.Choice([i.value for i in Algorithm]))
191234
@click.option(
192235
"--conf",
193236
default=None,
@@ -200,24 +243,74 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
200243
help="Name of run-configuration to run. If used, CLI arguments for database and table must be omitted.",
201244
metavar="NAME",
202245
)
246+
@click.option(
247+
"--dbt",
248+
is_flag=True,
249+
help="Run dbt diff.",
250+
)
251+
@click.option(
252+
"--dbt-cloud",
253+
is_flag=True,
254+
help="Run dbt diff on Datafold cloud. Expects an api key on env var DATAFOLD_API_KEY.",
255+
)
256+
@click.option(
257+
"--dbt-profiles-dir",
258+
default=None,
259+
metavar="PATH",
260+
help="Override the default dbt profile location (~/.dbt).",
261+
)
262+
@click.option(
263+
"--dbt-project-dir",
264+
default=None,
265+
metavar="PATH",
266+
help="Override the dbt project directory. Otherwise assumed to be the current directory.",
267+
)
203268
def main(conf, run, **kw):
204269
if kw["table2"] is None and kw["database2"]:
205270
# Use the "database table table" form
206271
kw["table2"] = kw["database2"]
207272
kw["database2"] = kw["database1"]
208273

274+
if kw["version"]:
275+
print(f"v{__version__}")
276+
return
277+
209278
if conf:
210279
kw = apply_config_from_file(conf, run, kw)
211280

281+
if kw["no_tracking"]:
282+
disable_tracking()
283+
284+
if kw.get("interactive"):
285+
kw["debug"] = True
286+
287+
if kw["debug"]:
288+
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT)
289+
if kw.get("__conf__"):
290+
kw["__conf__"] = deepcopy(kw["__conf__"])
291+
_remove_passwords_in_dict(kw["__conf__"])
292+
logging.debug(f"Applied run configuration: {kw['__conf__']}")
293+
elif kw.get("verbose"):
294+
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT)
295+
else:
296+
logging.basicConfig(level=logging.WARNING, format=LOG_FORMAT, datefmt=DATE_FORMAT)
297+
212298
try:
213-
return _main(**kw)
299+
if kw["dbt"] or kw["dbt_cloud"]:
300+
DbtDiffer.diff(
301+
profiles_dir_override=kw["dbt_profiles_dir"],
302+
project_dir_override=kw["dbt_project_dir"],
303+
is_cloud=kw["dbt_cloud"],
304+
)
305+
else:
306+
return _data_diff(**kw)
214307
except Exception as e:
215308
logging.error(e)
216309
if kw["debug"]:
217310
raise
218311

219312

220-
def _main(
313+
def _data_diff(
221314
database1,
222315
table1,
223316
database2,
@@ -246,31 +339,14 @@ def _main(
246339
materialize_all_rows,
247340
table_write_limit,
248341
materialize_to_table,
342+
dbt,
343+
dbt_cloud,
344+
dbt_profiles_dir,
345+
dbt_project_dir,
249346
threads1=None,
250347
threads2=None,
251348
__conf__=None,
252349
):
253-
if version:
254-
print(f"v{__version__}")
255-
return
256-
257-
if no_tracking:
258-
disable_tracking()
259-
260-
if interactive:
261-
debug = True
262-
263-
if debug:
264-
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT)
265-
if __conf__:
266-
__conf__ = deepcopy(__conf__)
267-
_remove_passwords_in_dict(__conf__)
268-
logging.debug(f"Applied run configuration: {__conf__}")
269-
elif verbose:
270-
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT)
271-
else:
272-
logging.basicConfig(level=logging.WARNING, format=LOG_FORMAT, datefmt=DATE_FORMAT)
273-
274350
if limit and stats:
275351
logging.error("Cannot specify a limit when using the -s/--stats switch")
276352
return

0 commit comments

Comments
 (0)