13
13
from sqeleton .schema import create_schema
14
14
from sqeleton .queries .api import current_timestamp
15
15
16
+ from .dbt import DbtDiffer
16
17
from .utils import eval_name_template , remove_password_from_url , safezip , match_like
17
18
from .diff_tables import Algorithm
18
- from .hashdiff_tables import HashDiffer , DEFAULT_BISECTION_THRESHOLD , DEFAULT_BISECTION_FACTOR
19
+ from .hashdiff_tables import (
20
+ HashDiffer ,
21
+ DEFAULT_BISECTION_THRESHOLD ,
22
+ DEFAULT_BISECTION_FACTOR ,
23
+ )
19
24
from .joindiff_tables import TABLE_WRITE_LIMIT , JoinDiffer
20
25
from .table_segment import TableSegment
21
26
from .databases import connect
@@ -84,9 +89,13 @@ def __init__(self, **kwargs):
84
89
def write_usage (self , prog : str , args : str = "" , prefix : Optional [str ] = None ) -> None :
85
90
self .write (f"data-diff v{ __version__ } - efficiently diff rows across database tables.\n \n " )
86
91
self .write ("Usage:\n " )
87
- self .write (f" * In-db diff: { prog } <database_a> <table_a> <table_b> [OPTIONS]\n " )
88
- self .write (f" * Cross-db diff: { prog } <database_a> <table_a> <database_b> <table_b> [OPTIONS]\n " )
89
- self .write (f" * Using config: { prog } --conf PATH [--run NAME] [OPTIONS]\n " )
92
+ self .write (
93
+ f" * In-db diff: data-diff --database1 <database_a> --table1 <table_a> --table2 <table_b> [OPTIONS]\n "
94
+ )
95
+ self .write (
96
+ f" * Cross-db diff: data-diff --database1 <database_a> --table1 <table_a> --database2 <database_b> --table2 <table_b> [OPTIONS]\n "
97
+ )
98
+ self .write (f" * Using config: data-diff --conf PATH [--run NAME] [OPTIONS]\n " )
90
99
91
100
92
101
click .Context .formatter_class = MyHelpFormatter
@@ -98,9 +107,20 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
98
107
@click .argument ("database2" , required = False )
99
108
@click .argument ("table2" , required = False )
100
109
@click .option (
101
- "-k" , "--key-columns" , default = [], multiple = True , help = "Names of primary key columns. Default='id'." , metavar = "NAME"
110
+ "-k" ,
111
+ "--key-columns" ,
112
+ default = [],
113
+ multiple = True ,
114
+ help = "Names of primary key columns. Default='id'." ,
115
+ metavar = "NAME" ,
116
+ )
117
+ @click .option (
118
+ "-t" ,
119
+ "--update-column" ,
120
+ default = None ,
121
+ help = "Name of updated_at/last_updated column" ,
122
+ metavar = "NAME" ,
102
123
)
103
- @click .option ("-t" , "--update-column" , default = None , help = "Name of updated_at/last_updated column" , metavar = "NAME" )
104
124
@click .option (
105
125
"-c" ,
106
126
"--columns" ,
@@ -111,7 +131,13 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
111
131
"Accepts a name or a pattern like in SQL. Example: -c col% -c another_col" ,
112
132
metavar = "NAME" ,
113
133
)
114
- @click .option ("-l" , "--limit" , default = None , help = "Maximum number of differences to find" , metavar = "NUM" )
134
+ @click .option (
135
+ "-l" ,
136
+ "--limit" ,
137
+ default = None ,
138
+ help = "Maximum number of differences to find" ,
139
+ metavar = "NUM" ,
140
+ )
115
141
@click .option (
116
142
"--bisection-factor" ,
117
143
default = None ,
@@ -140,15 +166,27 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
140
166
metavar = "AGE" ,
141
167
)
142
168
@click .option (
143
- "--max-age" , default = None , help = "Considers only rows younger than specified. See --min-age." , metavar = "AGE"
169
+ "--max-age" ,
170
+ default = None ,
171
+ help = "Considers only rows younger than specified. See --min-age." ,
172
+ metavar = "AGE" ,
144
173
)
145
174
@click .option ("-s" , "--stats" , is_flag = True , help = "Print stats instead of a detailed diff" )
146
175
@click .option ("-d" , "--debug" , is_flag = True , help = "Print debug info" )
147
- @click .option ("--json" , "json_output" , is_flag = True , help = "Print JSONL output for machine readability" )
176
+ @click .option (
177
+ "--json" ,
178
+ "json_output" ,
179
+ is_flag = True ,
180
+ help = "Print JSONL output for machine readability" ,
181
+ )
148
182
@click .option ("-v" , "--verbose" , is_flag = True , help = "Print extra info" )
149
183
@click .option ("--version" , is_flag = True , help = "Print version info and exit" )
150
184
@click .option ("-i" , "--interactive" , is_flag = True , help = "Confirm queries, implies --debug" )
151
- @click .option ("--no-tracking" , is_flag = True , help = "data-diff sends home anonymous usage data. Use this to disable it." )
185
+ @click .option (
186
+ "--no-tracking" ,
187
+ is_flag = True ,
188
+ help = "data-diff sends home anonymous usage data. Use this to disable it." ,
189
+ )
152
190
@click .option (
153
191
"--case-sensitive" ,
154
192
is_flag = True ,
@@ -185,9 +223,18 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
185
223
metavar = "COUNT" ,
186
224
)
187
225
@click .option (
188
- "-w" , "--where" , default = None , help = "An additional 'where' expression to restrict the search space. Beware of SQL Injection!" , metavar = "EXPR"
226
+ "-w" ,
227
+ "--where" ,
228
+ default = None ,
229
+ help = "An additional 'where' expression to restrict the search space. Beware of SQL Injection!" ,
230
+ metavar = "EXPR" ,
231
+ )
232
+ @click .option (
233
+ "-a" ,
234
+ "--algorithm" ,
235
+ default = Algorithm .AUTO .value ,
236
+ type = click .Choice ([i .value for i in Algorithm ]),
189
237
)
190
- @click .option ("-a" , "--algorithm" , default = Algorithm .AUTO .value , type = click .Choice ([i .value for i in Algorithm ]))
191
238
@click .option (
192
239
"--conf" ,
193
240
default = None ,
@@ -200,24 +247,74 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
200
247
help = "Name of run-configuration to run. If used, CLI arguments for database and table must be omitted." ,
201
248
metavar = "NAME" ,
202
249
)
250
+ @click .option (
251
+ "--dbt" ,
252
+ is_flag = True ,
253
+ help = "Run dbt diff." ,
254
+ )
255
+ @click .option (
256
+ "--dbt-cloud" ,
257
+ is_flag = True ,
258
+ help = "Run dbt diff on Datafold cloud. Expects an api key on env var DATAFOLD_API_KEY." ,
259
+ )
260
+ @click .option (
261
+ "--dbt-profiles-dir" ,
262
+ default = None ,
263
+ metavar = "PATH" ,
264
+ help = "Override the default dbt profile location (~/.dbt)." ,
265
+ )
266
+ @click .option (
267
+ "--dbt-project-dir" ,
268
+ default = None ,
269
+ metavar = "PATH" ,
270
+ help = "Override the dbt project directory. Otherwise assumed to be the current directory." ,
271
+ )
203
272
def main (conf , run , ** kw ):
204
273
if kw ["table2" ] is None and kw ["database2" ]:
205
274
# Use the "database table table" form
206
275
kw ["table2" ] = kw ["database2" ]
207
276
kw ["database2" ] = kw ["database1" ]
208
277
278
+ if kw ["version" ]:
279
+ print (f"v{ __version__ } " )
280
+ return
281
+
209
282
if conf :
210
283
kw = apply_config_from_file (conf , run , kw )
211
284
285
+ if kw ["no_tracking" ]:
286
+ disable_tracking ()
287
+
288
+ if kw .get ("interactive" ):
289
+ kw ["debug" ] = True
290
+
291
+ if kw ["debug" ]:
292
+ logging .basicConfig (level = logging .DEBUG , format = LOG_FORMAT , datefmt = DATE_FORMAT )
293
+ if kw .get ("__conf__" ):
294
+ kw ["__conf__" ] = deepcopy (kw ["__conf__" ])
295
+ _remove_passwords_in_dict (kw ["__conf__" ])
296
+ logging .debug (f"Applied run configuration: { kw ['__conf__' ]} " )
297
+ elif kw .get ("verbose" ):
298
+ logging .basicConfig (level = logging .INFO , format = LOG_FORMAT , datefmt = DATE_FORMAT )
299
+ else :
300
+ logging .basicConfig (level = logging .WARNING , format = LOG_FORMAT , datefmt = DATE_FORMAT )
301
+
212
302
try :
213
- return _main (** kw )
303
+ if kw ["dbt" ] or kw ["dbt_cloud" ]:
304
+ DbtDiffer .diff (
305
+ profiles_dir_override = kw ["dbt_profiles_dir" ],
306
+ project_dir_override = kw ["dbt_project_dir" ],
307
+ is_cloud = kw ["dbt_cloud" ],
308
+ )
309
+ else :
310
+ return _data_diff (** kw )
214
311
except Exception as e :
215
312
logging .error (e )
216
313
if kw ["debug" ]:
217
314
raise
218
315
219
316
220
- def _main (
317
+ def _data_diff (
221
318
database1 ,
222
319
table1 ,
223
320
database2 ,
@@ -246,31 +343,14 @@ def _main(
246
343
materialize_all_rows ,
247
344
table_write_limit ,
248
345
materialize_to_table ,
346
+ dbt ,
347
+ dbt_cloud ,
348
+ dbt_profiles_dir ,
349
+ dbt_project_dir ,
249
350
threads1 = None ,
250
351
threads2 = None ,
251
352
__conf__ = None ,
252
353
):
253
- if version :
254
- print (f"v{ __version__ } " )
255
- return
256
-
257
- if no_tracking :
258
- disable_tracking ()
259
-
260
- if interactive :
261
- debug = True
262
-
263
- if debug :
264
- logging .basicConfig (level = logging .DEBUG , format = LOG_FORMAT , datefmt = DATE_FORMAT )
265
- if __conf__ :
266
- __conf__ = deepcopy (__conf__ )
267
- _remove_passwords_in_dict (__conf__ )
268
- logging .debug (f"Applied run configuration: { __conf__ } " )
269
- elif verbose :
270
- logging .basicConfig (level = logging .INFO , format = LOG_FORMAT , datefmt = DATE_FORMAT )
271
- else :
272
- logging .basicConfig (level = logging .WARNING , format = LOG_FORMAT , datefmt = DATE_FORMAT )
273
-
274
354
if limit and stats :
275
355
logging .error ("Cannot specify a limit when using the -s/--stats switch" )
276
356
return
0 commit comments