Skip to content

Commit

Permalink
Add support for Trino
Browse files Browse the repository at this point in the history
  • Loading branch information
cbhagl committed Aug 22, 2022
1 parent 5f29f01 commit c8a9532
Show file tree
Hide file tree
Showing 11 changed files with 209 additions and 1 deletion.
14 changes: 13 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,18 @@ orbs:
azure-cli: circleci/azure-cli@1.1.0

jobs:


integration-trino:
docker:
- image: cimg/python:3.9.9
steps:
- checkout
- run:
name: "Run Tests - Trino"
command: ./run_test.sh trino
- store_artifacts:
path: ./logs

integration-redshift:
docker:
- image: cimg/python:3.9.9
Expand Down Expand Up @@ -111,6 +122,7 @@ workflows:
- integration-snowflake
- integration-bigquery
- integration-databricks
- integration-trino
#- integration-synapse
#- integration-azuresql:
# requires:
Expand Down
11 changes: 11 additions & 0 deletions integration_tests/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,17 @@ integration_tests:
target: postgres
outputs:

trino:
type: trino
method: none
user: "{{ env_var('TRINO_TEST_USER') }}"
password: "{{ env_var('TRINO_TEST_PASSWORD') }}"
host: "{{ env_var('TRINO_TEST_HOST') }}"
port: "{{ env_var('TRINO_TEST_PORT') | as_number }}"
database: "{{ env_var('TRINO_TEST_DBNAME') }}"
schema: dbt_external_tables_integration_tests_trino
threads: 1

redshift:
type: redshift
host: "{{ env_var('REDSHIFT_TEST_HOST') }}"
Expand Down
4 changes: 4 additions & 0 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ seeds:
sources:
dbt_external_tables_integration_tests:
plugins:
trino:
+enabled: "{{ target.type == 'trino' }}"
redshift:
+enabled: "{{ target.type == 'redshift' }}"
snowflake:
Expand All @@ -43,6 +45,8 @@ sources:
tests:
dbt_external_tables_integration_tests:
plugins:
trino:
+enabled: "{{ target.type == 'trino' }}"
redshift:
+enabled: "{{ target.type == 'redshift' }}"
snowflake:
Expand Down
19 changes: 19 additions & 0 deletions integration_tests/macros/plugins/trino/prep_external.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{% macro trino__prep_external() %}

{% set external_schema = target.schema %}
{% set external_schema_location = 's3://test_bucket/external_schema.db' %}

{% set create_external_schema %}

create schema if not exists
hive.{{ external_schema }}
with (
location = {{ external_schema_location }}
)

{% endset %}

{% do log('Creating external schema ' ~ external_schema, info = true) %}
{% do run_query(create_external_schema) %}

{% endmacro %}
54 changes: 54 additions & 0 deletions integration_tests/models/plugins/trino/trino_external.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
version: 2

sources:
- name: trino_external
schema: "{{ target.schema }}"
tables:
- name: people_csv_unpartitioned
external: &csv-people
location: "s3://dbt-external-tables-testing/csv/"
file_format: CSV
table_properties:
skip_header_line_count: 1
columns: &cols-of-the-people
- name: id
data_type: int
- name: first_name
data_type: varchar(64)
- name: last_name
data_type: varchar(64)
- name: email
data_type: varchar(64)
tests: &equal-to-the-people
- dbt_utils.equality:
compare_model: ref('people')
compare_columns:
- id
- first_name
- last_name
- email

- name: people_csv_partitioned
external:
<<: *csv-people
partitions: &parts-of-the-people
- name: section
data_type: varchar(1)
columns: *cols-of-the-people
tests: *equal-to-the-people

# ensure that all partitions are created
- name: people_csv_multipartitioned
external:
<<: *csv-people
location: "s3://dbt-external-tables-testing/"
partitions:
- name: file_format
data_type: varchar(4)
- name: section
data_type: varchar(1)
- name: some_date
data_type: date
- name: file_name
data_type: varchar(10)
columns: *cols-of-the-people
41 changes: 41 additions & 0 deletions macros/plugins/trino/create_external_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{% macro trino__create_external_table(source_node) %}
{%- set columns = source_node.columns.values() -%}
{%- set external = source_node.external -%}

create table {{source(source_node.source_name, source_node.name)}} (

{% for column in columns %}
{{column.name}} {{column.data_type}} {{- ',' if not loop.last -}}
{% endfor %}
{%- if external.partitions %} {{- ',' }}
{% for partition in external.partitions -%}
{{ partition.name }} {{ partition.data_type }} {{- ',' if not loop.last }}
{% endfor %}
{%- endif %}

)
{% if external.comment -%} comment '{{external.comment}}' {%- endif %}
with (

external_location = '{{external.location}}'

{%- if external.file_format %} {{- ',' }}
format = '{{external.file_format}}'
{%- endif -%}

{%- if external.partitions %} {{- ',' }}
partitioned_by = ARRAY[
{%- for partition in external.partitions -%}
'{{ partition.name }}' {{- ', ' if not loop.last }}
{%- endfor -%}
]
{%- endif -%}

{%- if external.table_properties %} {{- ',' }}
{% for name, value in external.table_properties.items() -%}
{{ name }}={{ '{!r}'.format(value) }} {{- ',' if not loop.last }}
{% endfor -%}
{%- endif %}

)
{% endmacro %}
19 changes: 19 additions & 0 deletions macros/plugins/trino/get_external_build_plan.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{% macro trino__get_external_build_plan(source_node) %}
{% set build_plan = [] %}
{% set old_relation = adapter.get_relation(
database = source_node.database,
schema = source_node.schema,
identifier = source_node.identifier
) %}
{% set create_or_replace = (old_relation is none or var('ext_full_refresh', false)) %}
{% if create_or_replace %}
{% set build_plan = [
dbt_external_tables.dropif(source_node),
dbt_external_tables.create_external_table(source_node)
] + dbt_external_tables.refresh_external_table(source_node)
%}
{% else %}
{% set build_plan = dbt_external_tables.refresh_external_table(source_node) %}
{% endif %}
{% do return(build_plan) %}
{% endmacro %}
6 changes: 6 additions & 0 deletions macros/plugins/trino/helpers/dropif.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{% macro trino__dropif(node) %}
{% set ddl %}
drop table if exists {{source(node.source_name, node.name)}}
{% endset %}
{{return(ddl)}}
{% endmacro %}
13 changes: 13 additions & 0 deletions macros/plugins/trino/refresh_external_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{% macro trino__refresh_external_table(source_node) %}
{%- set partitions = source_node.external.partitions -%}
{%- if partitions -%}
{% set drop_partitions -%}
call system.sync_partition_metadata('{{ source_node.source_name }}', '{{ source_node.name }}', 'DROP', false)
{%- endset %}
{% set add_partitions -%}
call system.sync_partition_metadata('{{ source_node.source_name }}', '{{ source_node.name }}', 'ADD', false)
{%- endset %}
{{ return([drop_partitions, add_partitions]) }}
{% endif %}
{% do return([]) %}
{% endmacro %}
4 changes: 4 additions & 0 deletions run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ if [[ ! -f $VENV ]]; then
then
echo "Installing dbt-spark"
pip install dbt-spark[ODBC] --upgrade --pre
elif [ $1 == 'trino' ]
then
echo "Installing dbt-trino"
pip install dbt-trino --upgrade --pre
elif [ $1 == 'azuresql' ]
then
echo "Installing dbt-sqlserver"
Expand Down
25 changes: 25 additions & 0 deletions sample_sources/trino.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
version: 2

sources:
- name: trino
tables:
- name: snowplow
description: "Snowplow events stored as CSV files in HDFS"
external:
location: "s3a://.../event.csv" # s3://, hdfs://, ...
file_format: CSV # file format: CSV, ORC, PARQUET, ...
table_properties: # as needed
skip_header_line_count: 1
csv_separator: ";"
csv_quote: '"'

columns:
- name: app_id
data_type: varchar
description: "Application ID"
- name: domain_sessionidx
data_type: int
description: "A visit / session index"
- name: etl_tstamp
data_type: timestamp
description: "Timestamp event began ETL"

0 comments on commit c8a9532

Please sign in to comment.