Skip to content
This repository has been archived by the owner on Nov 2, 2022. It is now read-only.

Commit

Permalink
updated bqml to use clientId vs fullvisitorid and removed unused files
Browse files Browse the repository at this point in the history
  • Loading branch information
Justin Pao committed Jul 27, 2020
1 parent 433f89a commit 5ea5056
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 126 deletions.
26 changes: 0 additions & 26 deletions BQML/bqml.model.lkml

This file was deleted.

16 changes: 8 additions & 8 deletions BQML/predictions.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ view: future_purchase_model {
--, CLASS_WEIGHTS=[('1',1), ('0',0.05)] -- Consider adding class weights or downsampling if you have imbalanced classes
) AS
SELECT
* EXCEPT(fullVisitorId)
* EXCEPT(clientId)
FROM ${training_input.SQL_TABLE_NAME};;
}
}
Expand Down Expand Up @@ -82,7 +82,7 @@ view: roc_curve {
type: number
link: {
label: "Likely Customers to Purchase"
url: "/explore/bqml_ga_demo/ga_sessions?fields=ga_sessions.fullVisitorId,future_purchase_prediction.max_predicted_score&f[future_purchase_prediction.predicted_will_purchase_in_future]=%3E%3D{{value}}"
url: "/explore/bqml_ga_demo/ga_sessions?fields=ga_sessions.clientId,future_purchase_prediction.max_predicted_score&f[future_purchase_prediction.predicted_will_purchase_in_future]=%3E%3D{{value}}"
icon_url: "http://www.looker.com/favicon.ico"
}
}
Expand Down Expand Up @@ -153,9 +153,9 @@ view: future_input {
extends: [user_facts]
derived_table: {
sql: {% assign x = "${EXTENDED}" %}
{% assign updated_start_sql = x | replace: 'DAYS_BACK',"6" %}
{% assign updated_start_sql = x | replace: 'DAYS_BACK',"30" %}
/*updated_start_date*/
{% assign updated_sql = updated_start_sql | replace: 'DAYS_FROM',"7" %}
{% assign updated_sql = updated_start_sql | replace: 'DAYS_FROM',"31" %}
/*updated_end_date*/
{{updated_sql}}
;;
Expand All @@ -165,7 +165,7 @@ view: future_input {

view: future_purchase_prediction {
derived_table: {
sql: SELECT fullVisitorId,
sql: SELECT clientId,
pred.prob as user_propensity_score,
NTILE(10) OVER (ORDER BY pred.prob DESC) as user_propensity_decile
FROM ml.PREDICT(
Expand All @@ -177,10 +177,10 @@ view: future_purchase_prediction {
}
dimension: user_propensity_score {type: number}
dimension: user_propensity_decile {type: number}
dimension: fullVisitorId {
dimension: clientId {
type: string
hidden: no
sql: TRIM(REPLACE(${TABLE}.fullVisitorId,',','')) ;;
sql: TRIM(REPLACE(${TABLE}.clientId,',','')) ;;
}
measure: average_user_propensity_score {
type: average
Expand All @@ -190,4 +190,4 @@ view: future_purchase_prediction {
type: average
sql: ${user_propensity_decile} ;;
}
}
}
52 changes: 26 additions & 26 deletions BQML/user_facts.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,31 @@ view: user_facts {
WHERE TIMESTAMP(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'\d\d\d\d\d\d\d\d'))) BETWEEN ((TIMESTAMP_ADD(TIMESTAMP_TRUNC( CURRENT_TIMESTAMP(), DAY), INTERVAL -DAYS_BACK DAY))) AND ((TIMESTAMP_ADD(TIMESTAMP_ADD(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY), INTERVAL -DAYS_BACK DAY), INTERVAL DAYS_FROM DAY)))),
-- labeling customers who have made a purchase as a 1 and customers who have not made a purchas as a 0
user_label AS (
SELECT fullvisitorId, max(case when totals.transactions >= 1 then 1 else 0 end) as label, max(case when totals.transactions >= 1 then visitStartTime end) as event_session
SELECT clientId, max(case when totals.transactions >= 1 then 1 else 0 end) as label, max(case when totals.transactions >= 1 then visitStartTime end) as event_session
FROM filtered_base
GROUP BY fullvisitorId),
GROUP BY clientId),
-- finding the most common hour of day for each user within the time period
unique_hour_of_day AS(
(SELECT ga_sessions_visit_start_hour_of_day, fullVisitorId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY fullVisitorId ORDER BY pageviews) as row_number, fullVisitorId, ga_sessions_visit_start_hour_of_day
FROM (SELECT ga_sessions.fullVisitorId as fullvisitorid, EXTRACT(HOUR FROM TIMESTAMP_SECONDS(ga_sessions.visitStarttime)) AS ga_sessions_visit_start_hour_of_day, SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),
(SELECT ga_sessions_visit_start_hour_of_day, clientId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY clientId ORDER BY pageviews) as row_number, clientId, ga_sessions_visit_start_hour_of_day
FROM (SELECT ga_sessions.clientId as clientId, EXTRACT(HOUR FROM TIMESTAMP_SECONDS(ga_sessions.visitStarttime)) AS ga_sessions_visit_start_hour_of_day, SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.clientId = user_label.clientId @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),
-- findiing the most common metro for the user within the time period
unique_dma AS(
(SELECT metro, fullVisitorId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY fullVisitorId ORDER BY pageviews) as row_number, fullVisitorId, metro
FROM (SELECT ga_sessions.fullVisitorId as fullvisitorid, ga_sessions.geoNetwork.metro as metro , SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),
(SELECT metro, clientId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY clientId ORDER BY pageviews) as row_number, clientId, metro
FROM (SELECT ga_sessions.clientId as clientId, ga_sessions.geoNetwork.metro as metro , SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.clientId = user_label.clientId @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),
-- finding the most common day of week for the user within the time period
unique_day_of_week AS(
(SELECT ga_sessions_visit_start_day_of_week, fullVisitorId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY fullVisitorId ORDER BY pageviews) as row_number, fullVisitorId, ga_sessions_visit_start_day_of_week
FROM (SELECT ga_sessions.fullVisitorId as fullvisitorid, FORMAT_TIMESTAMP('%A', TIMESTAMP_SECONDS(ga_sessions.visitStarttime)) AS ga_sessions_visit_start_day_of_week , SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),
(SELECT ga_sessions_visit_start_day_of_week, clientId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY clientId ORDER BY pageviews) as row_number, clientId, ga_sessions_visit_start_day_of_week
FROM (SELECT ga_sessions.clientId as clientId, FORMAT_TIMESTAMP('%A', TIMESTAMP_SECONDS(ga_sessions.visitStarttime)) AS ga_sessions_visit_start_day_of_week , SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.clientId = user_label.clientId @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),
-- finding the most common traffic source for the user
unique_traffic_source AS(
(SELECT ga_sessions_source, fullVisitorId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY fullVisitorId ORDER BY pageviews) as row_number, fullVisitorId, ga_sessions_source
FROM (SELECT ga_sessions.fullVisitorId as fullvisitorid, ga_sessions.trafficsource.medium AS ga_sessions_source, SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),
(SELECT ga_sessions_source, clientId FROM (SELECT ROW_NUMBER () OVER(PARTITION BY clientId ORDER BY pageviews) as row_number, clientId, ga_sessions_source
FROM (SELECT ga_sessions.clientId as clientId, ga_sessions.trafficsource.medium AS ga_sessions_source, SUM(ga_sessions.totals.pageviews) as pageviews
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.clientId = user_label.clientId @{QUERY_FILTER} GROUP BY 1,2)) WHERE row_number = 1)),

agg_metrics AS ( SELECT ga_sessions.fullvisitorid, count(distinct visitId) as total_sessions,
agg_metrics AS ( SELECT ga_sessions.clientId, count(distinct visitId) as total_sessions,
sum(totals.pageviews) as pageviews,
count(totals.bounces)/count(distinct VisitID) as bounce_rate,
sum(totals.pageviews) / count(distinct VisitID) as avg_session_depth,
Expand All @@ -47,19 +47,19 @@ view: user_facts {
sum(case when trafficSource.medium = 'cpm' then 1 else 0 end) as visits_traffic_source_cpm,
sum(case when trafficSource.medium = 'affiliate' then 1 else 0 end) as visits_traffic_source_affiliate,
sum(case when trafficSource.medium = 'referral' then 1 else 0 end) as visits_traffic_source_referral
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.fullvisitorid = user_label.fullvisitorid
FROM filtered_base AS ga_sessions LEFT JOIN user_label ON ga_sessions.clientId = user_label.clientId
@{QUERY_FILTER} GROUP BY 1 )


SELECT user_label.fullvisitorid, label,ga_sessions_visit_start_hour_of_day, metro, ga_sessions_visit_start_day_of_week, ga_sessions_source,
SELECT user_label.clientId, label,ga_sessions_visit_start_hour_of_day, metro, ga_sessions_visit_start_day_of_week, ga_sessions_source,
total_sessions, pageviews, bounce_rate, avg_session_depth, visits_traffic_source_none, visits_traffic_source_organic, visits_traffic_source_cpc, visits_traffic_source_cpm, visits_traffic_source_affiliate,
visits_traffic_source_referral, distinct_dmas, mobile, chrome, safari, browser_other
FROM user_label
LEFT JOIN unique_hour_of_day ON user_label.fullvisitorid = unique_hour_of_day.fullvisitorid
LEFT JOIN unique_dma ON user_label.fullvisitorid = unique_dma.fullvisitorid
LEFT JOIN unique_day_of_week ON user_label.fullvisitorid = unique_day_of_week.fullvisitorid
LEFT JOIN unique_traffic_source ON unique_traffic_source.fullvisitorid = user_label.fullvisitorid
LEFT JOIN agg_metrics ON agg_metrics.fullvisitorid = user_label.fullvisitorid
LEFT JOIN unique_hour_of_day ON user_label.clientId = unique_hour_of_day.clientId
LEFT JOIN unique_dma ON user_label.clientId = unique_dma.clientId
LEFT JOIN unique_day_of_week ON user_label.clientId = unique_day_of_week.clientId
LEFT JOIN unique_traffic_source ON unique_traffic_source.clientId = user_label.clientId
LEFT JOIN agg_metrics ON agg_metrics.clientId = user_label.clientId
;;
persist_for: "24 hours"
}
Expand All @@ -84,9 +84,9 @@ view: user_facts {
# sql: TIMESTAMP(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'\d\d\d\d\d\d\d\d'))) ;;
# }

dimension: fullvisitorid {
dimension: clientId {
type: string
sql: ${TABLE}.fullvisitorid ;;
sql: ${TABLE}.clientId ;;
}

dimension: label {
Expand Down Expand Up @@ -192,7 +192,7 @@ view: user_facts {

set: detail {
fields: [
fullvisitorid,
clientId,
label,
ga_sessions_visit_start_hour_of_day,
metro,
Expand All @@ -205,4 +205,4 @@ view: user_facts {
avg_session_depth
]
}
}
}
61 changes: 0 additions & 61 deletions BQML/user_label.view.lkml

This file was deleted.

6 changes: 6 additions & 0 deletions Google_Analytics/ga_sessions.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ view: ga_sessions {
group_label: "ID"
description: "The unique visitor ID (also known as client ID)."
sql: ${TABLE}.fullVisitorId ;;
hidden: yes
}

dimension: user_id {
Expand All @@ -49,6 +50,11 @@ view: ga_sessions {
sql: ${TABLE}.visitId ;;
}

dimension: client_id {
type: string
sql: ${TABLE}.clientId ;;
}

dimension: visitor_id {
hidden: yes
label: "User ID"
Expand Down
13 changes: 8 additions & 5 deletions marketing.model.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,14 @@ explore: ga_sessions {
# {% endif %};;
# }

# join: future_purchase_prediction {
# type: left_outer
# sql_on: ${future_purchase_prediction.fullVisitorId} = ${ga_sessions.full_visitor_id} ;;
# relationship: one_to_many
# }
}

explore: future_input {
join: future_purchase_prediction {
type: left_outer
sql_on: ${future_purchase_prediction.clientId} = ${future_input.clientId} ;;
relationship: one_to_many
}
}


Expand Down

0 comments on commit 5ea5056

Please sign in to comment.