forked from dbt-labs/dbt-audit-helper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompare_column_values.sql
66 lines (55 loc) · 2.52 KB
/
compare_column_values.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
{% macro compare_column_values(a_query, b_query, primary_key, column_to_compare) -%}
{{ return(adapter.dispatch('compare_column_values', 'audit_helper')(a_query, b_query, primary_key, column_to_compare)) }}
{%- endmacro %}
{% macro default__compare_column_values(a_query, b_query, primary_key, column_to_compare) -%}
with a_query as (
{{ a_query }}
),
b_query as (
{{ b_query }}
),
joined as (
select
coalesce(a_query.{{ primary_key }}, b_query.{{ primary_key }}) as {{ primary_key }},
a_query.{{ column_to_compare }} as a_query_value,
b_query.{{ column_to_compare }} as b_query_value,
case
when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then '✅: perfect match'
when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then '✅: both are null'
when a_query.{{ primary_key }} is null then '🤷: missing from a'
when b_query.{{ primary_key }} is null then '🤷: missing from b'
when a_query.{{ column_to_compare }} is null then '🤷: value is null in a only'
when b_query.{{ column_to_compare }} is null then '🤷: value is null in b only'
when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then '🙅: values do not match'
else 'unknown' -- this should never happen
end as match_status,
case
when a_query.{{ column_to_compare }} = b_query.{{ column_to_compare }} then 0
when a_query.{{ column_to_compare }} is null and b_query.{{ column_to_compare }} is null then 1
when a_query.{{ primary_key }} is null then 2
when b_query.{{ primary_key }} is null then 3
when a_query.{{ column_to_compare }} is null then 4
when b_query.{{ column_to_compare }} is null then 5
when a_query.{{ column_to_compare }} != b_query.{{ column_to_compare }} then 6
else 7 -- this should never happen
end as match_order
from a_query
full outer join b_query on a_query.{{ primary_key }} = b_query.{{ primary_key }}
),
aggregated as (
select
'{{ column_to_compare }}' as column_name,
match_status,
match_order,
count(*) as count_records
from joined
group by column_name, match_status, match_order
)
select
column_name,
match_status,
count_records,
round(100.0 * count_records / sum(count_records) over (), 2) as percent_of_total
from aggregated
order by match_order
{% endmacro %}