Skip to content

Commit

Permalink
Merge pull request #4 from fishtown-analytics/feature/compare-queries
Browse files Browse the repository at this point in the history
Add compare_queries macro
  • Loading branch information
Claire Carroll authored Jul 3, 2019
2 parents 2559376 + 85848f1 commit f2e45a5
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 82 deletions.
35 changes: 34 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,40 @@ Arguments:
* `primary_key` (optional): The primary key of the model. Used to sort unmatched
results for row-by-row validation.

## compare_queries ([source](macros/compare_queries.sql))
Super similar to `compare_relations`, except it takes two select statements. This macro is useful when:
* You need to filter out records from one of the relations.
* You need to rename or recast some columns to get them to match up.
* You only want to compare a small number of columns, so it's easier write the columns you want to compare, rather than the columns you want to exclude.

```sql
{# in dbt Develop #}

{% set old_fct_orders_query %}
select
id as order_id,
amount,
customer_id
from old_etl_schema.fct_orders
{% endset %}

{% set new_fct_orders_query %}
select
order_id,
amount,
customer_id
from {{ ref('fct_orders') }}
{% endset %}

{{ audit_helper.compare_queries(
a_query=old_fct_orders_query,
b_query=new_fct_orders_query,
primary_key="order_id"
) }}


```

# To-do:
* Macro to check if two models have the same structure
* Macro to check if two schemas contain the same relations
* Extend `compare_relations` macro to handle edge cases
13 changes: 13 additions & 0 deletions integration_tests/models/compare_queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% set a_query %}
select * from {{ ref('data_compare_relations__a_relation') }}
{% endset %}

{% set b_query %}
select * from {{ ref('data_compare_relations__b_relation') }}
{% endset %}

{{ audit_helper.compare_queries(
a_query=a_query,
b_query=b_query,
primary_key="order_id"
) }}
5 changes: 5 additions & 0 deletions integration_tests/models/schema.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
version: 2

models:
- name: compare_queries
tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_relations_without_exclude')

- name: compare_relations_with_exclude
tests:
- dbt_utils.equality:
Expand Down
82 changes: 82 additions & 0 deletions macros/compare_queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{% macro compare_queries(a_query, b_query, primary_key=None) %}

with a as (

{{ a_query }}

),

b as (

{{ b_query }}

),

a_intersect_b as (

select * from a
{{ dbt_utils.intersect() }}
select * from b

),

a_except_b as (

select * from a
{{ dbt_utils.except() }}
select * from b

),

b_except_a as (

select * from b
{{ dbt_utils.except() }}
select * from a

),

all_records as (

select
*,
true as in_a,
true as in_b
from a_intersect_b

union all

select
*,
true as in_a,
false as in_b
from a_except_b

union all

select
*,
false as in_a,
true as in_b
from b_except_a

),

summary_stats as (
select
in_a,
in_b,
count(*) as count
from all_records

group by 1, 2
)
-- select * from all_records
-- where not (in_a and in_b)
-- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc

select * from summary_stats

order by in_a desc, in_b desc

{% endmacro %}
92 changes: 11 additions & 81 deletions macros/compare_relations.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,90 +21,20 @@

{% set check_cols_csv = check_columns | map(attribute='quoted') | join(', ') %}

{% set a_query %}
select
{{ check_cols_csv }}

with a as (
from {{ a_relation }}
{% endset %}

select
{{ check_cols_csv }}
{% set b_query %}
select
{{ check_cols_csv }}

from {{ a_relation }}
from {{ b_relation }}
{% endset %}

),

b as (

select
{{ check_cols_csv }}

from {{ b_relation }}

),

a_intersect_b as (

select * from a
{{ dbt_utils.intersect() }}
select * from b

),

a_except_b as (

select * from a
{{ dbt_utils.except() }}
select * from b

),

b_except_a as (

select * from b
{{ dbt_utils.except() }}
select * from a

),

all_records as (

select
*,
true as in_a,
true as in_b
from a_intersect_b

union all

select
*,
true as in_a,
false as in_b
from a_except_b

union all

select
*,
false as in_a,
true as in_b
from b_except_a

),

summary_stats as (
select
in_a,
in_b,
count(*) as count
from all_records

group by 1, 2
)
-- select * from all_records
-- where not (in_a and in_b)
-- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc

select * from summary_stats

order by in_a desc, in_b desc
{{ audit_helper.compare_queries(a_query, b_query) }}

{% endmacro %}

0 comments on commit f2e45a5

Please sign in to comment.