Skip to content

Commit

Permalink
chore(benefits): alphabetize CTE fields
Browse files Browse the repository at this point in the history
add missing fields to historical conversion CTE
  • Loading branch information
thekaveman committed Nov 4, 2024
1 parent 5306630 commit 3e72b3a
Showing 1 changed file with 162 additions and 173 deletions.
335 changes: 162 additions & 173 deletions warehouse/models/mart/benefits/fct_benefits_events.sql
Original file line number Diff line number Diff line change
@@ -1,215 +1,204 @@
{{ config(materialized='table') }}

WITH fct_benefits_events AS (
SELECT
-- Only fields that aren't _always_ empty (https://dashboards.calitp.org/question#eyJkYXRhc2V0X3F1ZXJ5Ijp7ImRhdGFiYXNlIjoyLCJxdWVyeSI6eyJzb3VyY2UtdGFibGUiOjM1ODR9LCJ0eXBlIjoicXVlcnkifSwiZGlzcGxheSI6InRhYmxlIiwidmlzdWFsaXphdGlvbl9zZXR0aW5ncyI6e319)
app,
device_id,
user_id,
client_event_time,
event_id,
session_id,
case
when event_type = "selected eligibility verifier"
then "selected enrollment flow"
when event_type = "started payment connection"
then "started card tokenization"
when event_type = "closed payment connection" or event_type = "ended card tokenization"
then "finished card tokenization"
else event_type
end as event_type,
-- Fix bug in Docker build process resulting in incorrect version strings
-- https://github.com/cal-itp/benefits/pull/2392
case
when version_name = "2024.7.3.dev0+gcd3b083.d20240731"
then "2024.7.2"
when version_name = "2024.8.2.dev0+g7664917.d20240821"
then "2024.8.1"
when version_name = "2024.9.2.dev0+gadf41b9.d20240909"
then "2024.9.1"
when version_name = "2024.9.3.dev0+gfeb06d2.d20240918"
then "2024.9.2"
when version_name = "2024.9.4.dev0+g861519e.d20240926"
then "2024.9.3"
when version_name = "2024.10.2.dev0+g158e1b0.d20241010"
then "2024.10.1"
else version_name
end as version_name,
os_name,
os_version,
device_family,
device_type,
country,
language,
library,
city,
region,
event_time,
client_upload_time,
server_upload_time,
server_received_time,
amplitude_id,
-- Fix bug in Docker build process resulting in incorrect version strings
-- https://github.com/cal-itp/benefits/pull/2392
case
when start_version = "2024.7.3.dev0+gcd3b083.d20240731"
then "2024.7.2"
when start_version = "2024.8.2.dev0+g7664917.d20240821"
then "2024.8.1"
when start_version = "2024.9.2.dev0+gadf41b9.d20240909"
then "2024.9.1"
when start_version = "2024.9.3.dev0+gfeb06d2.d20240918"
then "2024.9.2"
when start_version = "2024.9.4.dev0+g861519e.d20240926"
then "2024.9.3"
when start_version = "2024.10.2.dev0+g158e1b0.d20241010"
then "2024.10.1"
else start_version
end as start_version,
uuid,
processed_time,

-- Event Properties (https://app.amplitude.com/data/compiler/Benefits/properties/main/latest/event)
{{ json_extract_column('event_properties', 'card_tokenize_func') }},
{{ json_extract_column('event_properties', 'card_tokenize_url') }},
{{ json_extract_column('event_properties', 'eligibility_verifier') }},
{{ json_extract_column('event_properties', 'error.name') }},
{{ json_extract_column('event_properties', 'error.status') }},
{{ json_extract_column('event_properties', 'error.sub') }},
{{ json_extract_column('event_properties', 'href') }},
{{ json_extract_column('event_properties', 'language') }},
{{ json_extract_column('event_properties', 'origin') }},
{{ json_extract_column('event_properties', 'path') }},
{{ json_extract_column('event_properties', 'status') }},
{{ json_extract_column('event_properties', 'transit_agency') }},

-- New column `enrollment_method`, historical values should be set to "digital"
-- https://github.com/cal-itp/benefits/pull/2402
COALESCE(
{{ json_extract_column('event_properties', 'enrollment_method', no_alias = true) }},
"digital"
) AS event_properties_enrollment_method,

-- Historical data existed in `auth_provider` but new data is in `claims_provider`
-- https://github.com/cal-itp/benefits/pull/2401
COALESCE(
{{ json_extract_column('event_properties', 'claims_provider', no_alias = true) }},
{{ json_extract_column('event_properties', 'auth_provider', no_alias = true) }}
) AS event_properties_claims_provider,

-- Historical data existed in `eligibility_types` but new data is in `enrollment_flows`
-- https://github.com/cal-itp/benefits/pull/2379
COALESCE(
{{ json_extract_flattened_column('event_properties', 'enrollment_flows', no_alias = true) }},
{{ json_extract_flattened_column('event_properties', 'eligibility_types', no_alias = true) }}
) AS event_properties_enrollment_flows,

-- Historical data existed in `payment_group` but new data is in `enrollment_group`
-- https://github.com/cal-itp/benefits/pull/2391
COALESCE(
{{ json_extract_flattened_column('event_properties', 'enrollment_group', no_alias = true) }},
{{ json_extract_flattened_column('event_properties', 'payment_group', no_alias = true) }}
) AS event_properties_enrollment_group,

-- User Properties (https://app.amplitude.com/data/compiler/Benefits/properties/main/latest/user)
{{ json_extract_column('user_properties', 'eligibility_verifier') }},
{{ json_extract_column('user_properties', 'initial_referrer') }},
{{ json_extract_column('user_properties', 'initial_referring_domain') }},

-- Historical data existed in `provider_name` but new data is in `transit_agency`
-- https://github.com/cal-itp/benefits/pull/901
COALESCE(
{{ json_extract_column('user_properties', 'transit_agency', no_alias = true) }},
{{ json_extract_column('user_properties', 'provider_name', no_alias = true) }}
) AS user_properties_transit_agency,

{{ json_extract_column('user_properties', 'referrer') }},
{{ json_extract_column('user_properties', 'referring_domain') }},
{{ json_extract_column('user_properties', 'user_agent') }},

-- New column `enrollment_method`, historical values should be set to "digital"
-- https://github.com/cal-itp/benefits/pull/2402
COALESCE(
{{ json_extract_column('user_properties', 'enrollment_method', no_alias = true) }},
"digital"
) AS user_properties_enrollment_method,

-- Historical data existed in `eligibility_types` but new data is in `enrollment_flows`
-- https://github.com/cal-itp/benefits/pull/2379
COALESCE(
{{ json_extract_flattened_column('user_properties', 'enrollment_flows', no_alias = true) }},
{{ json_extract_flattened_column('user_properties', 'eligibility_types', no_alias = true) }}
) AS user_properties_enrollment_flows

FROM {{ ref('stg_amplitude__benefits_events') }}
),
fct_old_enrollments AS (
SELECT
-- Keep fields in alphabetical order
amplitude_id,
app,
device_id,
user_id,
city,
client_event_time,
event_id,
session_id,
"returned enrollment" as event_type,
version_name,
os_name,
os_version,
client_upload_time,
country,
device_family,
device_id,
device_type,
country,
event_id,
{{ json_extract_column('event_properties', 'card_tokenize_func') }},
{{ json_extract_column('event_properties', 'card_tokenize_url') }},
-- Historical data existed in `auth_provider` but new data is in `claims_provider`
-- https://github.com/cal-itp/benefits/pull/2401
COALESCE(
{{ json_extract_column('event_properties', 'claims_provider', no_alias = true) }},
{{ json_extract_column('event_properties', 'auth_provider', no_alias = true) }}
) AS event_properties_claims_provider,
{{ json_extract_column('event_properties', 'eligibility_verifier') }},
-- Historical data existed in `eligibility_types` but new data is in `enrollment_flows`
-- https://github.com/cal-itp/benefits/pull/2379
COALESCE(
{{ json_extract_flattened_column('event_properties', 'enrollment_flows', no_alias = true) }},
{{ json_extract_flattened_column('event_properties', 'eligibility_types', no_alias = true) }}
) AS event_properties_enrollment_flows,
-- Historical data existed in `payment_group` but new data is in `enrollment_group`
-- https://github.com/cal-itp/benefits/pull/2391
COALESCE(
{{ json_extract_flattened_column('event_properties', 'enrollment_group', no_alias = true) }},
{{ json_extract_flattened_column('event_properties', 'payment_group', no_alias = true) }}
) AS event_properties_enrollment_group,
-- New column `enrollment_method`, historical values should be set to "digital"
-- https://github.com/cal-itp/benefits/pull/2402
COALESCE(
{{ json_extract_column('event_properties', 'enrollment_method', no_alias = true) }},
"digital"
) AS event_properties_enrollment_method,
{{ json_extract_column('event_properties', 'error.name') }},
{{ json_extract_column('event_properties', 'error.status') }},
{{ json_extract_column('event_properties', 'error.sub') }},
{{ json_extract_column('event_properties', 'href') }},
{{ json_extract_column('event_properties', 'language') }},
{{ json_extract_column('event_properties', 'origin') }},
{{ json_extract_column('event_properties', 'path') }},
{{ json_extract_column('event_properties', 'status') }},
{{ json_extract_column('event_properties', 'transit_agency') }},
event_time,
CASE
WHEN event_type = "selected eligibility verifier"
THEN "selected enrollment flow"
WHEN event_type = "started payment connection"
THEN "started card tokenization"
WHEN event_type = "closed payment connection" or event_type = "ended card tokenization"
THEN "finished card tokenization"
ELSE event_type
END AS event_type,
language,
library,
city,
os_name,
os_version,
processed_time,
region,
event_time,
client_upload_time,
server_upload_time,
server_received_time,
amplitude_id,
start_version,
server_upload_time,
session_id,
-- Fix bug in Docker build process resulting in incorrect version strings
-- https://github.com/cal-itp/benefits/pull/2392
CASE
WHEN start_version = "2024.7.3.dev0+gcd3b083.d20240731"
THEN "2024.7.2"
WHEN start_version = "2024.8.2.dev0+g7664917.d20240821"
THEN "2024.8.1"
WHEN start_version = "2024.9.2.dev0+gadf41b9.d20240909"
THEN "2024.9.1"
WHEN start_version = "2024.9.3.dev0+gfeb06d2.d20240918"
THEN "2024.9.2"
WHEN start_version = "2024.9.4.dev0+g861519e.d20240926"
THEN "2024.9.3"
WHEN start_version = "2024.10.2.dev0+g158e1b0.d20241010"
THEN "2024.10.1"
ELSE start_version
END AS start_version,
user_id,
{{ json_extract_column('user_properties', 'eligibility_verifier') }},
-- Historical data existed in `eligibility_types` but new data is in `enrollment_flows`
-- https://github.com/cal-itp/benefits/pull/2379
COALESCE(
{{ json_extract_flattened_column('user_properties', 'enrollment_flows', no_alias = true) }},
{{ json_extract_flattened_column('user_properties', 'eligibility_types', no_alias = true) }}
) AS user_properties_enrollment_flows,
-- New column `enrollment_method`, historical values should be set to "digital"
-- https://github.com/cal-itp/benefits/pull/2402
COALESCE(
{{ json_extract_column('user_properties', 'enrollment_method', no_alias = true) }},
"digital"
) AS user_properties_enrollment_method,
{{ json_extract_column('user_properties', 'initial_referrer') }},
{{ json_extract_column('user_properties', 'initial_referring_domain') }},
{{ json_extract_column('user_properties', 'referrer') }},
{{ json_extract_column('user_properties', 'referring_domain') }},
-- Historical data existed in `provider_name` but new data is in `transit_agency`
-- https://github.com/cal-itp/benefits/pull/901
COALESCE(
{{ json_extract_column('user_properties', 'transit_agency', no_alias = true) }},
{{ json_extract_column('user_properties', 'provider_name', no_alias = true) }}
) AS user_properties_transit_agency,
{{ json_extract_column('user_properties', 'user_agent') }},
uuid,
processed_time,
"digital" as event_properties_enrollment_method,
-- Fix bug in Docker build process resulting in incorrect version strings
-- https://github.com/cal-itp/benefits/pull/2392
CASE
WHEN version_name = "2024.7.3.dev0+gcd3b083.d20240731"
THEN "2024.7.2"
WHEN version_name = "2024.8.2.dev0+g7664917.d20240821"
THEN "2024.8.1"
WHEN version_name = "2024.9.2.dev0+gadf41b9.d20240909"
THEN "2024.9.1"
WHEN version_name = "2024.9.3.dev0+gfeb06d2.d20240918"
THEN "2024.9.2"
WHEN version_name = "2024.9.4.dev0+g861519e.d20240926"
THEN "2024.9.3"
WHEN version_name = "2024.10.2.dev0+g158e1b0.d20241010"
THEN "2024.10.1"
ELSE version_name
END AS version_name

FROM {{ ref('stg_amplitude__benefits_events') }}
),
fct_old_enrollments AS (
SELECT
-- Keep fields in alphabetical order, match fields from fct_benefits_events CTE
amplitude_id,
app,
city,
client_event_time,
client_upload_time,
country,
device_family,
device_id,
device_type,
event_id,
event_properties_card_tokenize_func,
event_properties_card_tokenize_url,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "cdt-logingov"
END as event_properties_claims_provider,
event_properties_card_tokenize_func,
event_properties_card_tokenize_url,
END AS event_properties_claims_provider,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "cdt-logingov"
END as event_properties_eligibility_verifier,
END AS event_properties_eligibility_verifier,
"senior" AS event_properties_enrollment_flows,
"5170d37b-43d5-4049-899c-b4d850e14990" AS event_properties_enrollment_group,
event_properties_enrollment_method,
event_properties_error_name,
event_properties_error_status,
event_properties_error_sub,
event_properties_href,
event_properties_language,
event_properties_origin,
event_properties_path,
"5170d37b-43d5-4049-899c-b4d850e14990" as event_properties_enrollment_group,
"success" as event_properties_status,
"Monterey-Salinas Transit" as event_properties_transit_agency,
"senior" as event_properties_enrollment_flows,
"digital" as user_properties_enrollment_method,
"success" AS event_properties_status,
"Monterey-Salinas Transit" AS event_properties_transit_agency,
event_time,
"returned enrollment" AS event_type,
language,
library,
os_name,
os_version,
processed_time,
region,
server_received_time,
server_upload_time,
session_id,
start_version,
user_id,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "cdt-logingov"
END as user_properties_eligibility_verifier,
END AS user_properties_eligibility_verifier,
"senior" AS user_properties_enrollment_flows,
user_properties_enrollment_method,
user_properties_initial_referrer,
user_properties_initial_referring_domain,
"Monterey-Salinas Transit" as user_properties_transit_agency,
user_properties_user_agent,
user_properties_referrer,
user_properties_referring_domain,
"senior" as user_properties_enrollment_flows
"Monterey-Salinas Transit" AS user_properties_transit_agency,
user_properties_user_agent,
uuid,
version_name
FROM fct_benefits_events
WHERE client_event_time >= '2021-12-08T08:00:00Z'
and client_event_time < '2022-08-29T07:00:00Z'
Expand Down

0 comments on commit 3e72b3a

Please sign in to comment.