Skip to content

Commit 470b72a

Browse files
authored
Merge pull request dbt-labs#57 from dbt-labs/default-compare-relation-cols
Create default implementation of get_columns_in_relation_sql
2 parents 5faf883 + ea3052d commit 470b72a

8 files changed

+64
-70
lines changed

.circleci/config.yml

+8-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- checkout
1818

1919
- run:
20-
run: setup_creds
20+
name: setup_creds
2121
command: |
2222
echo $BIGQUERY_SERVICE_ACCOUNT_JSON > ${HOME}/bigquery-service-key.json
2323
@@ -91,7 +91,7 @@ jobs:
9191
dbt deps --target bigquery
9292
dbt seed --target bigquery --full-refresh
9393
dbt compile --target bigquery
94-
dbt run --target bigquery
94+
dbt run --target bigquery --full-refresh
9595
dbt test --target bigquery
9696
9797
@@ -100,6 +100,12 @@ jobs:
100100
paths:
101101
- "dbt_venv"
102102

103+
104+
- store_artifacts:
105+
path: integration_tests/logs
106+
- store_artifacts:
107+
path: integration_tests/target
108+
103109
workflows:
104110
version: 2
105111
test-all:

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ For example, in the above result set, we can see that `status` and `amount` have
221221
switched order. Further, `order_date` is a timestamp in our "a" relation, whereas
222222
it is a date in our "b" relation.
223223

224+
Note: For adapters other than BigQuery, Postgres, Redshift, and Snowflake, the ordinal_position is inferred based on the response from dbt Core's `adapter.get_columns_in_relation()`, as opposed to being loaded from the information schema.
225+
224226
```sql
225227
{#- in dbt Develop -#}
226228

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
with audit_helper_results as (
3+
{{ audit_helper.compare_relation_columns(
4+
a_relation=ref('data_compare_relation_columns_a'),
5+
b_relation=ref('data_compare_relation_columns_b')
6+
) }}
7+
)
8+
9+
select
10+
--These need to be cast, otherwise they are technically typed as "sql_identifier" or "cardinal_number" on Redshift
11+
{{ "lower(" if target.type == 'snowflake' }} cast(column_name as {{ dbt.type_string() }}) {{ ")" if target.type == 'snowflake' }} as column_name,
12+
cast(a_ordinal_position as {{ dbt.type_int() }}) as a_ordinal_position,
13+
cast(b_ordinal_position as {{ dbt.type_int() }}) as b_ordinal_position,
14+
--not checking the specific datatypes, as long as they match/don't match as expected then that's still checking the audit behaviour
15+
has_ordinal_position_match,
16+
has_data_type_match
17+
from audit_helper_results

integration_tests/models/schema.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,9 @@ models:
5555
- name: compare_all_columns_where_clause
5656
tests:
5757
- dbt_utils.equality:
58-
compare_model: ref('expected_results__compare_all_columns_where_clause')
58+
compare_model: ref('expected_results__compare_all_columns_where_clause')
59+
60+
- name: compare_relation_columns
61+
tests:
62+
- dbt_utils.equality:
63+
compare_model: ref('expected_results__compare_relation_columns')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
awesome_column,zany_column,brave_column,young_column,cool_column,xcellent_column
2+
testing_is_fun,2022-02-22,1234,9.8765,false,2020-01-01T21:08:17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
magnificent_column,zany_column,brave_column,young_column,cool_column,xpeditionary_column,awesome_column
2+
2022-02-22,my_string_here,1234,9.8765,true,2020-01-01T21:08:17,testing_is_fun
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
COLUMN_NAME,A_ORDINAL_POSITION,B_ORDINAL_POSITION,HAS_ORDINAL_POSITION_MATCH,HAS_DATA_TYPE_MATCH
2+
awesome_column,1,7,false,true
3+
magnificent_column,,1,false,false
4+
zany_column,2,2,true,false
5+
brave_column,3,3,true,true
6+
young_column,4,4,true,true
7+
cool_column,5,5,true,true
8+
xpeditionary_column,,6,false,false
9+
xcellent_column,6,,false,false

macros/compare_relation_columns.sql

+18-67
Original file line numberDiff line numberDiff line change
@@ -33,79 +33,29 @@ order by coalesce(a_cols.ordinal_position, b_cols.ordinal_position)
3333

3434
{% endmacro %}
3535

36-
{% macro redshift__get_columns_in_relation_sql(relation) %}
37-
{#-
38-
See https://github.com/dbt-labs/dbt/blob/23484b18b71010f701b5312f920f04529ceaa6b2/plugins/redshift/dbt/include/redshift/macros/adapters.sql#L71
39-
Edited to include ordinal_position
40-
-#}
41-
with bound_views as (
42-
select
43-
ordinal_position,
44-
table_schema,
45-
column_name,
46-
data_type,
47-
character_maximum_length,
48-
numeric_precision,
49-
numeric_scale
50-
51-
from information_schema."columns"
52-
where table_name = '{{ relation.identifier }}'
53-
),
36+
{% macro default__get_columns_in_relation_sql(relation) %}
37+
38+
{% set columns = adapter.get_columns_in_relation(relation) %}
39+
{% for column in columns %}
40+
select
41+
{{ dbt.string_literal(column.name) }} as column_name,
42+
{{ loop.index }} as ordinal_position,
43+
{{ dbt.string_literal(column.data_type) }} as data_type
5444

55-
unbound_views as (
56-
select
57-
ordinal_position,
58-
view_schema,
59-
col_name,
60-
case
61-
when col_type ilike 'character varying%' then
62-
'character varying'
63-
when col_type ilike 'numeric%' then 'numeric'
64-
else col_type
65-
end as col_type,
66-
case
67-
when col_type like 'character%'
68-
then nullif(REGEXP_SUBSTR(col_type, '[0-9]+'), '')::int
69-
else null
70-
end as character_maximum_length,
71-
case
72-
when col_type like 'numeric%'
73-
then nullif(
74-
SPLIT_PART(REGEXP_SUBSTR(col_type, '[0-9,]+'), ',', 1),
75-
'')::int
76-
else null
77-
end as numeric_precision,
78-
case
79-
when col_type like 'numeric%'
80-
then nullif(
81-
SPLIT_PART(REGEXP_SUBSTR(col_type, '[0-9,]+'), ',', 2),
82-
'')::int
83-
else null
84-
end as numeric_scale
85-
86-
from pg_get_late_binding_view_cols()
87-
cols(view_schema name, view_name name, col_name name,
88-
col_type varchar, ordinal_position int)
89-
where view_name = '{{ relation.identifier }}'
90-
),
45+
{% if not loop.last -%}
46+
union all
47+
{%- endif %}
48+
{% endfor %}
9149

92-
unioned as (
93-
select * from bound_views
94-
union all
95-
select * from unbound_views
96-
)
97-
98-
select
99-
*
10050

101-
from unioned
102-
{% if relation.schema %}
103-
where table_schema = '{{ relation.schema }}'
104-
{% endif %}
105-
order by ordinal_position
51+
{% endmacro %}
10652

53+
{% macro redshift__get_columns_in_relation_sql(relation) %}
54+
{# You can't store the results of an info schema query to a table/view in Redshift, because the data only lives on the leader node #}
55+
{{ return (audit_helper.default__get_columns_in_relation_sql(relation)) }}
10756
{% endmacro %}
10857

58+
10959
{% macro snowflake__get_columns_in_relation_sql(relation) %}
11060
{#-
11161
From: https://github.com/dbt-labs/dbt/blob/dev/louisa-may-alcott/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql#L48
@@ -132,6 +82,7 @@ Edited to include ordinal_position
13282
order by ordinal_position
13383
{% endmacro %}
13484

85+
13586
{% macro postgres__get_columns_in_relation_sql(relation) %}
13687
{#-
13788
From: https://github.com/dbt-labs/dbt/blob/23484b18b71010f701b5312f920f04529ceaa6b2/plugins/postgres/dbt/include/postgres/macros/adapters.sql#L32

0 commit comments

Comments
 (0)