@@ -80,6 +80,7 @@ class DiffStats:
80
80
table2_count : int
81
81
unchanged : int
82
82
diff_percent : float
83
+ extra_column_diffs : Optional [Dict [str , int ]]
83
84
84
85
85
86
@dataclass
@@ -95,17 +96,33 @@ def __iter__(self):
95
96
self .result_list .append (i )
96
97
yield i
97
98
98
- def _get_stats (self ) -> DiffStats :
99
+ def _get_stats (self , is_dbt : bool = False ) -> DiffStats :
99
100
list (self ) # Consume the iterator into result_list, if we haven't already
100
101
102
+ key_columns = self .info_tree .info .tables [0 ].key_columns
103
+ len_key_columns = len (key_columns )
101
104
diff_by_key = {}
105
+ extra_column_diffs = None
106
+ if is_dbt :
107
+ extra_column_values_store = {}
108
+ extra_columns = self .info_tree .info .tables [0 ].extra_columns
109
+ extra_column_diffs = {k : 0 for k in extra_columns }
110
+
102
111
for sign , values in self .result_list :
103
- k = values [: len (self .info_tree .info .tables [0 ].key_columns )]
112
+ k = values [:len_key_columns ]
113
+ if is_dbt :
114
+ extra_column_values = values [len_key_columns :]
104
115
if k in diff_by_key :
105
116
assert sign != diff_by_key [k ]
106
117
diff_by_key [k ] = "!"
118
+ if is_dbt :
119
+ for i in range (0 , len (extra_columns )):
120
+ if extra_column_values [i ] != extra_column_values_store [k ][i ]:
121
+ extra_column_diffs [extra_columns [i ]] += 1
107
122
else :
108
123
diff_by_key [k ] = sign
124
+ if is_dbt :
125
+ extra_column_values_store [k ] = extra_column_values
109
126
110
127
diff_by_sign = {k : 0 for k in "+-!" }
111
128
for sign in diff_by_key .values ():
@@ -116,23 +133,41 @@ def _get_stats(self) -> DiffStats:
116
133
unchanged = table1_count - diff_by_sign ["-" ] - diff_by_sign ["!" ]
117
134
diff_percent = 1 - unchanged / max (table1_count , table2_count )
118
135
119
- return DiffStats (diff_by_sign , table1_count , table2_count , unchanged , diff_percent )
136
+ return DiffStats (diff_by_sign , table1_count , table2_count , unchanged , diff_percent , extra_column_diffs )
120
137
121
- def get_stats_string (self ):
122
- diff_stats = self ._get_stats ()
123
- string_output = ""
124
- string_output += f"{ diff_stats .table1_count } rows in table A\n "
125
- string_output += f"{ diff_stats .table2_count } rows in table B\n "
126
- string_output += f"{ diff_stats .diff_by_sign ['-' ]} rows exclusive to table A (not present in B)\n "
127
- string_output += f"{ diff_stats .diff_by_sign ['+' ]} rows exclusive to table B (not present in A)\n "
128
- string_output += f"{ diff_stats .diff_by_sign ['!' ]} rows updated\n "
129
- string_output += f"{ diff_stats .unchanged } rows unchanged\n "
130
- string_output += f"{ 100 * diff_stats .diff_percent :.2f} % difference score\n "
131
-
132
- if self .stats :
133
- string_output += "\n Extra-Info:\n "
134
- for k , v in sorted (self .stats .items ()):
135
- string_output += f" { k } = { v } \n "
138
+
139
+ def get_stats_string (self , is_dbt : bool = False ):
140
+ diff_stats = self ._get_stats (is_dbt )
141
+
142
+ if is_dbt :
143
+ string_output = "\n | Rows Added\t | Rows Removed\n "
144
+ string_output += "------------------------------------------------------------\n "
145
+
146
+ string_output += f"| { diff_stats .diff_by_sign ['-' ]} \t \t | { diff_stats .diff_by_sign ['+' ]} \n "
147
+ string_output += "------------------------------------------------------------\n \n "
148
+ string_output += f"Updated Rows: { diff_stats .diff_by_sign ['!' ]} \n "
149
+ string_output += f"Unchanged Rows: { diff_stats .unchanged } \n \n "
150
+
151
+ string_output += f"Values Updated:"
152
+
153
+ for k , v in diff_stats .extra_column_diffs .items ():
154
+ string_output += f"\n { k } : { v } "
155
+
156
+ else :
157
+
158
+ string_output = ""
159
+ string_output += f"{ diff_stats .table1_count } rows in table A\n "
160
+ string_output += f"{ diff_stats .table2_count } rows in table B\n "
161
+ string_output += f"{ diff_stats .diff_by_sign ['-' ]} rows exclusive to table A (not present in B)\n "
162
+ string_output += f"{ diff_stats .diff_by_sign ['+' ]} rows exclusive to table B (not present in A)\n "
163
+ string_output += f"{ diff_stats .diff_by_sign ['!' ]} rows updated\n "
164
+ string_output += f"{ diff_stats .unchanged } rows unchanged\n "
165
+ string_output += f"{ 100 * diff_stats .diff_percent :.2f} % difference score\n "
166
+
167
+ if self .stats :
168
+ string_output += "\n Extra-Info:\n "
169
+ for k , v in sorted (self .stats .items ()):
170
+ string_output += f" { k } = { v } \n "
136
171
137
172
return string_output
138
173
0 commit comments