Skip to content

Commit b4b3008

Browse files
authored
spruce up CLI output in dbt context (datafold#381)
* spruce up CLI output in dbt context * refactor _get_stats * clean up string_output * make extra_column_diffs optional * add is_dbt boolean * add is_dbt var to _get_stats * provide is_dbt argument to _get_stats and remove _get_stats_dbt * add extra_column_diffs = None, add is_dbt to _get_stats * change Identical to Unchanged * create len_key_columns * black -l 120
1 parent 608312d commit b4b3008

File tree

2 files changed

+54
-19
lines changed

2 files changed

+54
-19
lines changed

data_diff/dbt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def _local_diff(diff_vars: DiffVars) -> None:
161161
+ prod_qualified_string
162162
+ "[/] \n"
163163
+ column_diffs_str
164-
+ diff.get_stats_string()
164+
+ diff.get_stats_string(is_dbt=True)
165165
+ "\n"
166166
)
167167
else:

data_diff/diff_tables.py

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class DiffStats:
8080
table2_count: int
8181
unchanged: int
8282
diff_percent: float
83+
extra_column_diffs: Optional[Dict[str, int]]
8384

8485

8586
@dataclass
@@ -95,17 +96,33 @@ def __iter__(self):
9596
self.result_list.append(i)
9697
yield i
9798

98-
def _get_stats(self) -> DiffStats:
99+
def _get_stats(self, is_dbt: bool = False) -> DiffStats:
99100
list(self) # Consume the iterator into result_list, if we haven't already
100101

102+
key_columns = self.info_tree.info.tables[0].key_columns
103+
len_key_columns = len(key_columns)
101104
diff_by_key = {}
105+
extra_column_diffs = None
106+
if is_dbt:
107+
extra_column_values_store = {}
108+
extra_columns = self.info_tree.info.tables[0].extra_columns
109+
extra_column_diffs = {k: 0 for k in extra_columns}
110+
102111
for sign, values in self.result_list:
103-
k = values[: len(self.info_tree.info.tables[0].key_columns)]
112+
k = values[:len_key_columns]
113+
if is_dbt:
114+
extra_column_values = values[len_key_columns:]
104115
if k in diff_by_key:
105116
assert sign != diff_by_key[k]
106117
diff_by_key[k] = "!"
118+
if is_dbt:
119+
for i in range(0, len(extra_columns)):
120+
if extra_column_values[i] != extra_column_values_store[k][i]:
121+
extra_column_diffs[extra_columns[i]] += 1
107122
else:
108123
diff_by_key[k] = sign
124+
if is_dbt:
125+
extra_column_values_store[k] = extra_column_values
109126

110127
diff_by_sign = {k: 0 for k in "+-!"}
111128
for sign in diff_by_key.values():
@@ -116,23 +133,41 @@ def _get_stats(self) -> DiffStats:
116133
unchanged = table1_count - diff_by_sign["-"] - diff_by_sign["!"]
117134
diff_percent = 1 - unchanged / max(table1_count, table2_count)
118135

119-
return DiffStats(diff_by_sign, table1_count, table2_count, unchanged, diff_percent)
136+
return DiffStats(diff_by_sign, table1_count, table2_count, unchanged, diff_percent, extra_column_diffs)
120137

121-
def get_stats_string(self):
122-
diff_stats = self._get_stats()
123-
string_output = ""
124-
string_output += f"{diff_stats.table1_count} rows in table A\n"
125-
string_output += f"{diff_stats.table2_count} rows in table B\n"
126-
string_output += f"{diff_stats.diff_by_sign['-']} rows exclusive to table A (not present in B)\n"
127-
string_output += f"{diff_stats.diff_by_sign['+']} rows exclusive to table B (not present in A)\n"
128-
string_output += f"{diff_stats.diff_by_sign['!']} rows updated\n"
129-
string_output += f"{diff_stats.unchanged} rows unchanged\n"
130-
string_output += f"{100*diff_stats.diff_percent:.2f}% difference score\n"
131-
132-
if self.stats:
133-
string_output += "\nExtra-Info:\n"
134-
for k, v in sorted(self.stats.items()):
135-
string_output += f" {k} = {v}\n"
138+
139+
def get_stats_string(self, is_dbt: bool = False):
140+
diff_stats = self._get_stats(is_dbt)
141+
142+
if is_dbt:
143+
string_output = "\n| Rows Added\t| Rows Removed\n"
144+
string_output += "------------------------------------------------------------\n"
145+
146+
string_output += f"| {diff_stats.diff_by_sign['-']}\t\t| {diff_stats.diff_by_sign['+']}\n"
147+
string_output += "------------------------------------------------------------\n\n"
148+
string_output += f"Updated Rows: {diff_stats.diff_by_sign['!']}\n"
149+
string_output += f"Unchanged Rows: {diff_stats.unchanged}\n\n"
150+
151+
string_output += f"Values Updated:"
152+
153+
for k, v in diff_stats.extra_column_diffs.items():
154+
string_output += f"\n{k}: {v}"
155+
156+
else:
157+
158+
string_output = ""
159+
string_output += f"{diff_stats.table1_count} rows in table A\n"
160+
string_output += f"{diff_stats.table2_count} rows in table B\n"
161+
string_output += f"{diff_stats.diff_by_sign['-']} rows exclusive to table A (not present in B)\n"
162+
string_output += f"{diff_stats.diff_by_sign['+']} rows exclusive to table B (not present in A)\n"
163+
string_output += f"{diff_stats.diff_by_sign['!']} rows updated\n"
164+
string_output += f"{diff_stats.unchanged} rows unchanged\n"
165+
string_output += f"{100*diff_stats.diff_percent:.2f}% difference score\n"
166+
167+
if self.stats:
168+
string_output += "\nExtra-Info:\n"
169+
for k, v in sorted(self.stats.items()):
170+
string_output += f" {k} = {v}\n"
136171

137172
return string_output
138173

0 commit comments

Comments
 (0)