Skip to content

Commit

Permalink
create option to skip json parsing (#20)
Browse files Browse the repository at this point in the history
This creates a new option `fullstory_skip_json_parse` that will skip the
parsing step when creating the events table. This is useful if your data
has already been transformed into JSON columns.
  • Loading branch information
huttotw authored Feb 8, 2024
1 parent a802375 commit 88cfcce
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 9 deletions.
20 changes: 20 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,26 @@ jobs:
dbt run --target bigquery --full-refresh
dbt test --target bigquery
bigquery_v2_test:
docker:
- image: cimg/python:3.11.4
steps:
- setup_job

- run:
name: "Test - BigQuery V2"
environment:
BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json"
command: |
. venv/bin/activate
echo `pwd`
cd integration_tests
dbt seed --target bigquery_v2 --full-refresh
dbt compile --target bigquery_v2 --vars '{"fullstory_skip_json_parse": true}'
dbt run --target bigquery_v2 --full-refresh --select bigquery_v2_events_shim --vars '{"fullstory_skip_json_parse": true}'
dbt run --target bigquery_v2 --full-refresh --vars '{"fullstory_skip_json_parse": true}'
dbt test --target bigquery_v2 --vars '{"fullstory_skip_json_parse": true}'
snowflake_test:
docker:
- image: cimg/python:3.11.4
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ This dbt package contains models, macros, seeds, and tests for [FullStory](https
| fullstory_identified_users_model_name | The customized name of the `identified_users` model. |
| fullstory_identities_model_name | The customized name of the `identities`` model. |
| fullstory_sessions_model_name | The customized name of the `sessions`` model. |
| fullstory_skip_json_parse | Whether or not to skip JSON parsing when processing the data, default False. |
| fullstory_users_model_name | The customized name of the `users`` model. |
| fullstory_min_event_time | All events before this date will not be considered for analysis. Use this option to limit table size. |
| fullstory_event_types | A list of event types to auto-generate rollups for in the `users` and `sessions` model. |
Expand Down
9 changes: 9 additions & 0 deletions integration_tests/ci/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ dbt_fullstory_integration_tests:
threads: 4
priority: interactive

bigquery_v2:
type: bigquery
method: service-account
keyfile: "{{ env_var('BIGQUERY_SERVICE_KEY_PATH') }}"
project: "{{ env_var('BIGQUERY_GCP_PROJECT') }}"
dataset: "dbt_fullstory_testing_v2"
threads: 4
priority: interactive

snowflake:
type: snowflake
account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}"
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/models/_bigquery_events_shim.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ version: 2
models:
- name: bigquery_events_shim
config:
enabled: "{{ (target.type == 'bigquery') | as_bool }}"
enabled: "{{ (target.name == 'bigquery') | as_bool }}"
alias: fullstory_events_integration_tests
7 changes: 7 additions & 0 deletions integration_tests/models/_bigquery_v2_events_shim.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2

models:
- name: bigquery_v2_events_shim
config:
enabled: "{{ (target.name == 'bigquery_v2') | as_bool }}"
alias: fullstory_events_integration_tests
13 changes: 13 additions & 0 deletions integration_tests/models/bigquery_v2_events_shim.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
select
event_id,
event_time,
processed_time,
updated_time,
device_id,
session_id,
view_id,
event_type,
parse_json(event_properties) as event_properties,
source_type,
parse_json(source_properties) as source_properties
from {{ ref('fullstory_events_integration_seeds') }}
2 changes: 1 addition & 1 deletion macros/parse_json_into_columns.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{%- macro parse_json_into_columns(field, columns) -%}
{%- for column in columns -%}

{%- set inner = json_value(field, column.path, column.array, column.dtype) %}
{%- set inner = json_value(field, column.path, column.array, column.dtype, column.skip_parse) %}
{{ column.prefix -}}

{%- if column.cast_as -%}
Expand Down
47 changes: 40 additions & 7 deletions models/events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,116 +17,139 @@ select
"name": "device_user_agent",
"path": "$.user_agent.raw_user_agent",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "device_type",
"path": "$.user_agent.device",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "device_operating_system",
"path": "$.user_agent.operating_system",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "device_browser",
"path": "$.user_agent.browser",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "device_browser_version",
"path": "$.user_agent.browser_version",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "geo_ip_address",
"path": "$.location.ip_address",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "geo_country",
"path": "$.location.country",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "geo_region",
"path": "$.location.region",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "geo_city",
"path": "$.location.city",
"cast_as": "string"
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "geo_lat_long",
"path": "$.location.lat_long",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "url_full_url",
"path": "$.url.full_url",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "url_host",
"path": "$.url.host",
"cast_as": "string"
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "url_path",
"path": "$.url.path",
"cast_as": "string"
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "url_query",
"path": "$.url.query",
"cast_as": "string"
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "url_hash_path",
"path": "$.url.hash_path",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "url_hash_query",
"path": "$.url.hash_query",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "initial_referrer_full_url",
"path": "$.initial_referrer.full_url",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "initial_referrer_host",
"path": "$.initial_referrer.host",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "initial_referrer_path",
"path": "$.initial_referrer.path",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "initial_referrer_query",
"path": "$.initial_referrer.query",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "initial_referrer_hash_path",
"path": "$.initial_referrer.hash_path",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "initial_referrer_hash_query",
"path": "$.initial_referrer.hash_query",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "source_properties",
"path": "$",
"dtype": "object",
"skip_parse": var("fullstory_skip_json_parse", False),
},
],
)
Expand All @@ -138,29 +161,34 @@ select
{
"name": "target_text",
"path": "$.target.text",
"cast_as": "string"
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "target_masked",
"path": "$.target.masked",
"cast_as": "boolean",
"prefix": "coalesce(",
"postfix": ", FALSE)",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "target_raw_selector",
"path": "$.target.raw_selector",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "element_definition_id",
"path": "$.target.element_definition_id",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "additional_element_definition_ids",
"path": "$.target.additional_element_definition_ids",
"array": true,
"skip_parse": var("fullstory_skip_json_parse", False),
},
],
)
Expand All @@ -172,27 +200,32 @@ select
{
"name": "user_id",
"path": "$.user_id",
"cast_as": "string"
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "user_email",
"path": "$.user_email",
"cast_as": "string"
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "user_display_name",
"path": "$.user_display_name",
"cast_as": "string",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "user_properties",
"path": "$.user_properties",
"dtype": "object",
"skip_parse": var("fullstory_skip_json_parse", False),
},
{
"name": "event_properties",
"path": "$",
"dtype": "object",
"skip_parse": var("fullstory_skip_json_parse", False),
},
],
)
Expand Down

0 comments on commit 88cfcce

Please sign in to comment.