Merge pull request dbt-labs#57 from dbt-labs/default-compare-relation-cols

joellabes · web-flow · commit 470b72aba24c · 2023-01-26T16:26:55.000+13:00
Create default implementation of get_columns_in_relation_sql
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -17,7 +17,7 @@ jobs:
       - checkout
 
       - run:
-          run: setup_creds
+          name: setup_creds
           command: |
             echo $BIGQUERY_SERVICE_ACCOUNT_JSON > ${HOME}/bigquery-service-key.json
 
@@ -91,7 +91,7 @@ jobs:
             dbt deps --target bigquery
             dbt seed --target bigquery --full-refresh
             dbt compile --target bigquery
-            dbt run --target bigquery
+            dbt run --target bigquery --full-refresh
             dbt test --target bigquery
 
 
@@ -100,6 +100,12 @@ jobs:
           paths:
             - "dbt_venv"
 
+            
+      - store_artifacts:
+          path: integration_tests/logs
+      - store_artifacts:
+          path: integration_tests/target
+
 workflows:
   version: 2
   test-all:
diff --git a/README.md b/README.md
@@ -221,6 +221,8 @@ For example, in the above result set, we can see that `status` and `amount` have
 switched order. Further, `order_date` is a timestamp in our "a" relation, whereas
 it is a date in our "b" relation.
 
+Note: For adapters other than BigQuery, Postgres, Redshift, and Snowflake, the ordinal_position is inferred based on the response from dbt Core's `adapter.get_columns_in_relation()`, as opposed to being loaded from the information schema.
+
 ```sql
 {#- in dbt Develop -#}
 
diff --git a/integration_tests/models/compare_relation_columns.sql b/integration_tests/models/compare_relation_columns.sql
@@ -0,0 +1,17 @@
+
+with audit_helper_results as (
+    {{ audit_helper.compare_relation_columns(
+        a_relation=ref('data_compare_relation_columns_a'),
+        b_relation=ref('data_compare_relation_columns_b')
+    ) }}
+)
+
+select 
+    --These need to be cast, otherwise they are technically typed as "sql_identifier" or "cardinal_number" on Redshift
+    {{ "lower(" if target.type == 'snowflake' }} cast(column_name as {{ dbt.type_string() }}) {{ ")" if target.type == 'snowflake' }} as column_name, 
+    cast(a_ordinal_position as {{ dbt.type_int() }}) as a_ordinal_position,
+    cast(b_ordinal_position as {{ dbt.type_int() }}) as b_ordinal_position,
+    --not checking the specific datatypes, as long as they match/don't match as expected then that's still checking the audit behaviour
+    has_ordinal_position_match,
+    has_data_type_match
+from audit_helper_results
diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml
@@ -55,4 +55,9 @@ models:
   - name: compare_all_columns_where_clause
     tests:
       - dbt_utils.equality:
-          compare_model: ref('expected_results__compare_all_columns_where_clause')
+          compare_model: ref('expected_results__compare_all_columns_where_clause')
+
+  - name: compare_relation_columns
+    tests:
+      - dbt_utils.equality:
+          compare_model: ref('expected_results__compare_relation_columns')
diff --git a/integration_tests/seeds/data_compare_relation_columns_a.csv b/integration_tests/seeds/data_compare_relation_columns_a.csv
@@ -0,0 +1,2 @@
+awesome_column,zany_column,brave_column,young_column,cool_column,xcellent_column
+testing_is_fun,2022-02-22,1234,9.8765,false,2020-01-01T21:08:17
diff --git a/integration_tests/seeds/data_compare_relation_columns_b.csv b/integration_tests/seeds/data_compare_relation_columns_b.csv
@@ -0,0 +1,2 @@
+magnificent_column,zany_column,brave_column,young_column,cool_column,xpeditionary_column,awesome_column
+2022-02-22,my_string_here,1234,9.8765,true,2020-01-01T21:08:17,testing_is_fun
diff --git a/integration_tests/seeds/expected_results__compare_relation_columns.csv b/integration_tests/seeds/expected_results__compare_relation_columns.csv
@@ -0,0 +1,9 @@
+COLUMN_NAME,A_ORDINAL_POSITION,B_ORDINAL_POSITION,HAS_ORDINAL_POSITION_MATCH,HAS_DATA_TYPE_MATCH
+awesome_column,1,7,false,true
+magnificent_column,,1,false,false
+zany_column,2,2,true,false
+brave_column,3,3,true,true
+young_column,4,4,true,true
+cool_column,5,5,true,true
+xpeditionary_column,,6,false,false
+xcellent_column,6,,false,false
diff --git a/macros/compare_relation_columns.sql b/macros/compare_relation_columns.sql
@@ -33,79 +33,29 @@ order by coalesce(a_cols.ordinal_position, b_cols.ordinal_position)
 
 {% endmacro %}
 
-{% macro redshift__get_columns_in_relation_sql(relation) %}
-{#-
-See https://github.com/dbt-labs/dbt/blob/23484b18b71010f701b5312f920f04529ceaa6b2/plugins/redshift/dbt/include/redshift/macros/adapters.sql#L71
-Edited to include ordinal_position
--#}
-with bound_views as (
-  select
-    ordinal_position,
-    table_schema,
-    column_name,
-    data_type,
-    character_maximum_length,
-    numeric_precision,
-    numeric_scale
-
-  from information_schema."columns"
-  where table_name = '{{ relation.identifier }}'
-),
+{% macro default__get_columns_in_relation_sql(relation) %}
+    
+  {% set columns = adapter.get_columns_in_relation(relation) %}
+  {% for column in columns %}
+    select 
+      {{ dbt.string_literal(column.name) }} as column_name, 
+      {{ loop.index }} as ordinal_position,
+      {{ dbt.string_literal(column.data_type) }} as data_type
 
-unbound_views as (
-select
-  ordinal_position,
-  view_schema,
-  col_name,
-  case
-    when col_type ilike 'character varying%' then
-      'character varying'
-    when col_type ilike 'numeric%' then 'numeric'
-    else col_type
-  end as col_type,
-  case
-    when col_type like 'character%'
-    then nullif(REGEXP_SUBSTR(col_type, '[0-9]+'), '')::int
-    else null
-  end as character_maximum_length,
-  case
-    when col_type like 'numeric%'
-    then nullif(
-      SPLIT_PART(REGEXP_SUBSTR(col_type, '[0-9,]+'), ',', 1),
-      '')::int
-    else null
-  end as numeric_precision,
-  case
-    when col_type like 'numeric%'
-    then nullif(
-      SPLIT_PART(REGEXP_SUBSTR(col_type, '[0-9,]+'), ',', 2),
-      '')::int
-    else null
-  end as numeric_scale
-
-from pg_get_late_binding_view_cols()
-cols(view_schema name, view_name name, col_name name,
-     col_type varchar, ordinal_position int)
-where view_name = '{{ relation.identifier }}'
-),
+  {% if not loop.last -%}
+    union all 
+  {%- endif %}
+  {% endfor %}
 
-unioned as (
-select * from bound_views
-union all
-select * from unbound_views
-)
-
-select
-*
 
-from unioned
-{% if relation.schema %}
-where table_schema = '{{ relation.schema }}'
-{% endif %}
-order by ordinal_position
+{% endmacro %}
 
+{% macro redshift__get_columns_in_relation_sql(relation) %}
+  {# You can't store the results of an info schema query to a table/view in Redshift, because the data only lives on the leader node #}
+  {{ return (audit_helper.default__get_columns_in_relation_sql(relation)) }}
 {% endmacro %}
 
+
 {% macro snowflake__get_columns_in_relation_sql(relation) %}
 {#-
 From: https://github.com/dbt-labs/dbt/blob/dev/louisa-may-alcott/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql#L48
@@ -132,6 +82,7 @@ Edited to include ordinal_position
   order by ordinal_position
 {% endmacro %}
 
+
 {% macro postgres__get_columns_in_relation_sql(relation) %}
 {#-
 From: https://github.com/dbt-labs/dbt/blob/23484b18b71010f701b5312f920f04529ceaa6b2/plugins/postgres/dbt/include/postgres/macros/adapters.sql#L32

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+awesome_column,zany_column,brave_column,young_column,cool_column,xcellent_column`
	`2`	`+testing_is_fun,2022-02-22,1234,9.8765,false,2020-01-01T21:08:17`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+magnificent_column,zany_column,brave_column,young_column,cool_column,xpeditionary_column,awesome_column`
	`2`	`+2022-02-22,my_string_here,1234,9.8765,true,2020-01-01T21:08:17,testing_is_fun`