diff --git a/README.md b/README.md index 6cd0b189..2913c75c 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,40 @@ Arguments: * `primary_key` (optional): The primary key of the model. Used to sort unmatched results for row-by-row validation. +## compare_queries ([source](macros/compare_queries.sql)) +Super similar to `compare_relations`, except it takes two select statements. This macro is useful when: +* You need to filter out records from one of the relations. +* You need to rename or recast some columns to get them to match up. +* You only want to compare a small number of columns, so it's easier write the columns you want to compare, rather than the columns you want to exclude. + +```sql +{# in dbt Develop #} + +{% set old_fct_orders_query %} + select + id as order_id, + amount, + customer_id + from old_etl_schema.fct_orders +{% endset %} + +{% set new_fct_orders_query %} + select + order_id, + amount, + customer_id + from {{ ref('fct_orders') }} +{% endset %} + +{{ audit_helper.compare_queries( + a_query=old_fct_orders_query, + b_query=new_fct_orders_query, + primary_key="order_id" +) }} + + +``` + # To-do: * Macro to check if two models have the same structure * Macro to check if two schemas contain the same relations -* Extend `compare_relations` macro to handle edge cases diff --git a/integration_tests/models/compare_queries.sql b/integration_tests/models/compare_queries.sql new file mode 100644 index 00000000..b1060587 --- /dev/null +++ b/integration_tests/models/compare_queries.sql @@ -0,0 +1,13 @@ +{% set a_query %} + select * from {{ ref('data_compare_relations__a_relation') }} +{% endset %} + +{% set b_query %} + select * from {{ ref('data_compare_relations__b_relation') }} +{% endset %} + +{{ audit_helper.compare_queries( + a_query=a_query, + b_query=b_query, + primary_key="order_id" +) }} diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index 2d54638f..1411b650 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -1,6 +1,11 @@ version: 2 models: + - name: compare_queries + tests: + - dbt_utils.equality: + compare_model: ref('expected_results__compare_relations_without_exclude') + - name: compare_relations_with_exclude tests: - dbt_utils.equality: diff --git a/macros/compare_queries.sql b/macros/compare_queries.sql new file mode 100644 index 00000000..64db9c37 --- /dev/null +++ b/macros/compare_queries.sql @@ -0,0 +1,82 @@ +{% macro compare_queries(a_query, b_query, primary_key=None) %} + +with a as ( + + {{ a_query }} + +), + +b as ( + + {{ b_query }} + +), + +a_intersect_b as ( + + select * from a + {{ dbt_utils.intersect() }} + select * from b + +), + +a_except_b as ( + + select * from a + {{ dbt_utils.except() }} + select * from b + +), + +b_except_a as ( + + select * from b + {{ dbt_utils.except() }} + select * from a + +), + +all_records as ( + + select + *, + true as in_a, + true as in_b + from a_intersect_b + + union all + + select + *, + true as in_a, + false as in_b + from a_except_b + + union all + + select + *, + false as in_a, + true as in_b + from b_except_a + +), + +summary_stats as ( + select + in_a, + in_b, + count(*) as count + from all_records + + group by 1, 2 +) +-- select * from all_records +-- where not (in_a and in_b) +-- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc + +select * from summary_stats + +order by in_a desc, in_b desc + +{% endmacro %} diff --git a/macros/compare_relations.sql b/macros/compare_relations.sql index e40af1ae..400797d0 100644 --- a/macros/compare_relations.sql +++ b/macros/compare_relations.sql @@ -21,90 +21,20 @@ {% set check_cols_csv = check_columns | map(attribute='quoted') | join(', ') %} +{% set a_query %} +select + {{ check_cols_csv }} -with a as ( +from {{ a_relation }} +{% endset %} - select - {{ check_cols_csv }} +{% set b_query %} +select + {{ check_cols_csv }} - from {{ a_relation }} +from {{ b_relation }} +{% endset %} -), - -b as ( - - select - {{ check_cols_csv }} - - from {{ b_relation }} - -), - -a_intersect_b as ( - - select * from a - {{ dbt_utils.intersect() }} - select * from b - -), - -a_except_b as ( - - select * from a - {{ dbt_utils.except() }} - select * from b - -), - -b_except_a as ( - - select * from b - {{ dbt_utils.except() }} - select * from a - -), - -all_records as ( - - select - *, - true as in_a, - true as in_b - from a_intersect_b - - union all - - select - *, - true as in_a, - false as in_b - from a_except_b - - union all - - select - *, - false as in_a, - true as in_b - from b_except_a - -), - -summary_stats as ( - select - in_a, - in_b, - count(*) as count - from all_records - - group by 1, 2 -) --- select * from all_records --- where not (in_a and in_b) --- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc - -select * from summary_stats - -order by in_a desc, in_b desc +{{ audit_helper.compare_queries(a_query, b_query) }} {% endmacro %}