Skip to content
This repository has been archived by the owner on Nov 2, 2022. It is now read-only.

Commit

Permalink
updated page funnel and bqml model
Browse files Browse the repository at this point in the history
  • Loading branch information
Justin Pao committed Jul 17, 2020
1 parent 1419787 commit 21dabf8
Show file tree
Hide file tree
Showing 12 changed files with 446 additions and 905 deletions.
10 changes: 5 additions & 5 deletions BQML/predictions.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ view: training_input {
derived_table: {
sql:
{% assign x = "${EXTENDED}" %}
{% assign updated_start_sql = x | replace: 'START_DATE',"'2018-01-01 12:00:00'" %}
{% assign updated_start_sql = x | replace: 'START_DATE',"'2017-02-01 12:00:00'" %}
/*updated_start_date*/
{% assign updated_sql = updated_start_sql | replace: 'END_DATE',"'2018-03-01 14:00:00'" %}
{% assign updated_sql = updated_start_sql | replace: 'END_DATE',"'2018-07-01 14:00:00'" %}
/*updated_end_date*/
{{updated_sql}}
;;
Expand All @@ -18,9 +18,9 @@ view: testing_input {
extends: [user_facts]
derived_table: {
sql: {% assign x = "${EXTENDED}" %}
{% assign updated_start_sql = x | replace: 'START_DATE',"'2018-03-01 12:00:00'" %}
{% assign updated_start_sql = x | replace: 'START_DATE',"'2017-02-01 12:00:00'" %}
/*updated_start_date*/
{% assign updated_sql = updated_start_sql | replace: 'END_DATE',"'2018-03-01 14:00:00'" %}
{% assign updated_sql = updated_start_sql | replace: 'END_DATE',"'2018-07-01 14:00:00'" %}
/*updated_end_date*/
{{updated_sql}}
;;
Expand Down Expand Up @@ -176,7 +176,7 @@ view: future_purchase_prediction {
}
dimension: user_propensity_score {type: number}
dimension: user_propensity_decile {type: number}
dimension: fullVisitorId {type: number hidden: yes}
dimension: fullVisitorId {type: number hidden: no}
measure: average_user_propensity_score {
type: average
sql: ${user_propensity_score} ;;
Expand Down
13 changes: 6 additions & 7 deletions BQML/user_facts.view.lkml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
view: user_facts {
derived_table: {
sql: WITH filtered_base AS (
SELECT * FROM `ComcastGA360.@{GA360_TABLE_NAME}`
SELECT * FROM `@{SCHEMA_NAME}.@{GA360_TABLE_NAME}`
WHERE TIMESTAMP(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'\d\d\d\d\d\d\d\d'))) BETWEEN START_DATE AND END_DATE),

user_label AS (
Expand All @@ -12,23 +12,23 @@ view: user_facts {
unique_hour_of_day AS(
(SELECT ga_sessions_visit_start_hour_of_day, fullVisitorId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY fullVisitorId ORDER BY pageviews) as row_number, fullVisitorId, ga_sessions_visit_start_hour_of_day
FROM (SELECT ga_sessions.fullVisitorId as fullvisitorid, EXTRACT(HOUR FROM TIMESTAMP_SECONDS(ga_sessions.visitStarttime)) AS ga_sessions_visit_start_hour_of_day, SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER_2} GROUP BY 1,2)) WHERE row_number = 1)),
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),


unique_dma AS(
(SELECT metro, fullVisitorId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY fullVisitorId ORDER BY pageviews) as row_number, fullVisitorId, metro
FROM (SELECT ga_sessions.fullVisitorId as fullvisitorid, ga_sessions.geoNetwork.metro as metro , SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER_2} GROUP BY 1,2)) WHERE row_number = 1)),
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),

unique_day_of_week AS(
(SELECT ga_sessions_visit_start_day_of_week, fullVisitorId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY fullVisitorId ORDER BY pageviews) as row_number, fullVisitorId, ga_sessions_visit_start_day_of_week
FROM (SELECT ga_sessions.fullVisitorId as fullvisitorid, FORMAT_TIMESTAMP('%A', TIMESTAMP_SECONDS(ga_sessions.visitStarttime)) AS ga_sessions_visit_start_day_of_week , SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER_2} GROUP BY 1,2)) WHERE row_number = 1)),
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),

unique_traffic_source AS(
(SELECT ga_sessions_source, fullVisitorId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY fullVisitorId ORDER BY pageviews) as row_number, fullVisitorId, ga_sessions_source
FROM (SELECT ga_sessions.fullVisitorId as fullvisitorid, ga_sessions.trafficsource.medium AS ga_sessions_source, SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER_2} GROUP BY 1,2)) WHERE row_number = 1)),
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),

agg_metrics AS ( SELECT ga_sessions.fullvisitorid, count(distinct visitId) as total_sessions,
sum(totals.pageviews) as pageviews,
Expand All @@ -47,7 +47,7 @@ view: user_facts {
sum(case when trafficSource.medium = 'affiliate' then 1 else 0 end) as visits_traffic_source_affiliate,
sum(case when trafficSource.medium = 'referral' then 1 else 0 end) as visits_traffic_source_referral
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid
@{QUERY_FILTER_2} GROUP BY 1 )
@{QUERY_FILTER} GROUP BY 1 )


SELECT user_label.fullvisitorid, label,ga_sessions_visit_start_hour_of_day, metro, ga_sessions_visit_start_day_of_week, ga_sessions_source,
Expand All @@ -57,7 +57,6 @@ view: user_facts {
LEFT JOIN unique_hour_of_day ON user_label.fullvisitorid = unique_hour_of_day.fullvisitorid
LEFT JOIN unique_dma ON user_label.fullvisitorid = unique_dma.fullvisitorid
LEFT JOIN unique_day_of_week ON user_label.fullvisitorid = unique_day_of_week.fullvisitorid
LEFT JOIN unique_browser ON user_label.fullvisitorid = unique_browser.fullvisitorid
LEFT JOIN unique_traffic_source ON unique_traffic_source.fullvisitorid = user_label.fullvisitorid
LEFT JOIN agg_metrics ON agg_metrics.fullvisitorid = user_label.fullvisitorid
;;
Expand Down
19 changes: 11 additions & 8 deletions BQML/user_label.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,28 @@ view: user_label {
derived_table: {
sql: SELECT fullvisitorId, max(case when totals.transactions = 1 then 1 else 0 end) as label, min(case when totals.transactions = 1 then visitStartTime end) as event_session
FROM `@{SCHEMA_NAME}.@{GA360_TABLE_NAME}`
WHERE TIMESTAMP(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'\d\d\d\d\d\d\d\d'))) BETWEEN {% date_start date_range_filter %} AND {% date_end date_range_filter %}
GROUP BY fullvisitorId
;;
# sql_trigger_value: SELECT CURRENT_DATE ;;
sql_trigger_value: SELECT CURRENT_DATE ;;

}



filter: date_range_filter {
type: date
default_value: "2017-02-01 12:00:00 to
2018-07-01 14:00:00"
}

dimension: fullvisitorId {
primary_key: yes
}
dimension: label {}
dimension: label {
type: number
sql: ${TABLE}.label ;;
hidden: yes
}

dimension: made_purchase {
type: yesno
sql: label = 1 ;;
}

dimension: event_session_seconds {
type: number
Expand Down
28 changes: 27 additions & 1 deletion Google_Analytics/event_actions.view.lkml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
explore: event_actions {
hidden: yes
}
view: event_actions {
derived_table: {
sql: SELECT
Expand Down Expand Up @@ -27,4 +30,27 @@ view: event_actions {
}
}

explore: event_actions {}
explore: top_pages {
hidden: yes
}

view: top_pages {
derived_table: {
sql:SELECT
SPLIT(hits.page.pagePath, '?')[OFFSET(0)] as page_path, COUNT(DISTINCT CASE WHEN (hits.type = 'PAGE') THEN CONCAT((CONCAT(
CAST(ga_sessions.fullVisitorId AS STRING)
, '|'
, COALESCE(CAST(ga_sessions.visitId AS STRING),'')
, '|'
, CAST(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'^\d\d\d\d\d\d\d\d')) AS STRING)
)),'|',FORMAT('%05d',hits.hitNumber)) ELSE NULL END) AS hits_page_count
FROM `@{SCHEMA_NAME}.@{GA360_TABLE_NAME}` AS ga_sessions
LEFT JOIN UNNEST(ga_sessions.hits) AS hits
GROUP BY 1
ORDER BY 2 DESC
LIMIT 20;;
persist_for: "24 hours"
}

dimension: page_path {}
}
Loading

0 comments on commit 21dabf8

Please sign in to comment.