Skip to content

Commit 5a7502f

Browse files
author
Jonah Paten
authored
feat: add landing pages to analytics package (#4378) (#4384)
* feat: added landing page analytics to package, refactored (#4378) * chore: refactored util functions for sheets to a different file (#4378) * chore: bumped setup.py (#4378)
1 parent e92c50d commit 5a7502f

File tree

5 files changed

+380
-222
lines changed

5 files changed

+380
-222
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import datetime as dt
2+
from .charts import get_data_df, get_df_over_time
3+
from .entities import ADDITIONAL_DATA_BEHAVIOR
4+
import numpy as np
5+
import pandas as pd
6+
7+
def get_data_df_from_fields(metrics, dimensions, **other_params):
8+
"""
9+
Get a df from the Analytics API with metrics and dimensions as specified in fields.py
10+
11+
:param metrics: the metrics to get
12+
:param dimensions: the dimensions to get
13+
:param other_params: any other parameters to be passed to the get_data_df function, including service params
14+
:return: a DataFrame with the data from the Analytics API.
15+
The DF has an arbitrary RangeIndex,
16+
string columns containing dimensions with names equal to the dimension alias value,
17+
and int columns containing metrics with names equal to the metric alias value.
18+
"""
19+
df = get_data_df(
20+
[metric["id"] for metric in metrics],
21+
[dimension["id"] for dimension in dimensions],
22+
**other_params
23+
)
24+
return df.reset_index().rename(columns=get_rename_dict(dimensions+metrics)).copy()
25+
26+
27+
def get_rename_dict(dimensions):
28+
"""Get a dictionary to rename the columns of a DataFrame."""
29+
return dict(
30+
zip([dimension["id"] for dimension in dimensions], [dimension["alias"] for dimension in dimensions])
31+
)
32+
33+
34+
def get_one_period_change_series(series_current, series_previous, start_current, end_current, start_previous, end_previous):
35+
"""
36+
Get the percent change between two serieses, accounting for different numbers of days in the month.
37+
:param series_current: the series representing the current month
38+
:param series_previous: the series representing the prior month
39+
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
40+
:param end_current: the end date for the current month
41+
:param start_previous: the start date for the prior month
42+
:param end_previous: the end date for the prior month
43+
:return: a Series with the change between the two serieses
44+
"""
45+
# Check that both serieses have the same index names
46+
assert series_current.index.names == series_previous.index.names
47+
# Reindex both serieses to have the same index
48+
combined_index = series_current.index.union(series_previous.index)
49+
current_length = float((dt.datetime.fromisoformat(end_current) - dt.datetime.fromisoformat(start_current)).days + 1)
50+
previous_length = float((dt.datetime.fromisoformat(end_previous) - dt.datetime.fromisoformat(start_previous)).days + 1)
51+
assert current_length != 0 and previous_length != 0
52+
series_current_reindexed = series_current.reindex(combined_index).fillna(0)
53+
# Adjust the values from the prior series to account for the different number of days in the month
54+
series_previous_reindexed = (series_previous.reindex(combined_index) * current_length / previous_length)
55+
change = ((series_current_reindexed / series_previous_reindexed) - 1).replace({np.inf: np.nan})
56+
return change
57+
58+
59+
def get_change_over_time_df(
60+
metrics, time_dimension, include_changes=True, additional_data_path=None, additional_data_behavior=None, strftime_format="%Y-%m", **other_params
61+
):
62+
"""
63+
Get a DataFrame with the change over time for the given metrics, renamed to match metric_titles
64+
:param metrics: the metrics to be displayed
65+
:param time_dimension: the time dimension to be used
66+
:param include_changes: whether to include the percent change columns, defaults to True
67+
:param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None
68+
:param additional_data_behavior: the behavior to use when adding the additional data, defaults to None
69+
:param strftime_format: the format to use for the time dimension, defaults to "%Y-%m". None means a datetime will be returned
70+
:param other_params: any other parameters to be passed to the get_df_over_time function, including service params
71+
:returns: a datetime with the values of the metrics for each time dimension.
72+
Columns are the time dimension alias (as a datetime), metric aliases (as ints), and change metric aliases (as floats)
73+
"""
74+
df_api = get_df_over_time(
75+
[metric["alias"] for metric in metrics],
76+
[metric["id"] for metric in metrics],
77+
time_dimension["id"],
78+
sort_results=[time_dimension["id"]],
79+
df_processor=(lambda df: df.set_index(df.index + "01").sort_index(ascending=False)),
80+
format_table=False,
81+
**other_params
82+
).rename({time_dimension["id"]: time_dimension["alias"]})
83+
84+
df_combined = pd.DataFrame()
85+
86+
if additional_data_path is not None:
87+
assert additional_data_behavior is not None
88+
df_saved = pd.read_json(additional_data_path)
89+
if additional_data_behavior == ADDITIONAL_DATA_BEHAVIOR.ADD:
90+
df_combined = df_api.add(df_saved.astype(int), fill_value=0)[::-1]
91+
elif additional_data_behavior == ADDITIONAL_DATA_BEHAVIOR.REPLACE:
92+
df_combined = pd.concat([df_saved, df_api], ignore_index=False)
93+
df_combined = df_combined.loc[~df_combined.index.duplicated(keep="first")].sort_index(ascending=False)
94+
else:
95+
df_combined = df_api
96+
97+
if include_changes:
98+
df_combined[
99+
[metric["change_alias"] for metric in metrics]
100+
] = df_combined[
101+
[metric["alias"] for metric in metrics]
102+
].pct_change(periods=-1).replace({np.inf: np.nan})
103+
104+
if strftime_format is not None:
105+
df_combined.index = pd.to_datetime(df_combined.index).strftime(strftime_format)
106+
107+
return df_combined.reset_index(names=time_dimension["alias"])
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Metric names
2+
# The number of events that occur
3+
from enum import Enum
4+
5+
# The number of events that occur
6+
METRIC_EVENT_COUNT = {
7+
"id": "eventCount",
8+
"alias": "Event Count",
9+
"change_alias": "Event Count Change",
10+
}
11+
# The total number of users that trigger an event
12+
# Includes users who visit very briefly and do not interact with the site
13+
# See https://support.google.com/analytics/answer/12253918?hl=en
14+
METRIC_TOTAL_USERS = {
15+
"id": "totalUsers",
16+
"alias": "Total Users",
17+
"change_alias": "Total Users Change",
18+
}
19+
# The number of active users as defined by GA4
20+
# See https://support.google.com/analytics/answer/12253918?hl=en
21+
METRIC_ACTIVE_USERS = {
22+
"id": "activeUsers",
23+
"alias": "Users",
24+
"change_alias": "Active Users Change",
25+
}
26+
# The number of page views
27+
METRIC_PAGE_VIEWS = {
28+
"id": "screenPageViews",
29+
"alias": "Total Pageviews",
30+
"change_alias": "Total Pageviews Change",
31+
}
32+
# The number of sessions
33+
METRIC_SESSIONS = {
34+
"id": "sessions",
35+
"alias": "Sessions",
36+
"change_alias": "Sessions Change",
37+
}
38+
# The total number of clicks on outbound links. Generated from other metrics, so does not have an id field
39+
SYNTHETIC_METRIC_CLICKS = {
40+
"id": None,
41+
"alias": "Total Clicks",
42+
"change_alias": "Total Clicks Change",
43+
}
44+
45+
# Event Names
46+
# The builtin outbound link click event. Stores the clicked URL in DIMENSION_BUILTIN_URL
47+
# Triggers under some circumstances where custom click does not, but does not include url fragments in any dimensions
48+
EVENT_BUILTIN_CLICK = "click"
49+
# The custom outbound link click event. Stores the clicked URL DIMENSION_CUSTOM_URL
50+
# Includes url fragments, sometimes has a slightly different count to the built in click event
51+
EVENT_CUSTOM_CLICK = "outbound_link_clicked"
52+
# The builtin page view event.
53+
EVENT_PAGE_VIEW = "page_view"
54+
55+
# DIMENSIONS
56+
# The path to the page the user is on when the event occurs. Does not include fragments or parameters
57+
DIMENSION_PAGE_PATH = {
58+
"id": "pagePath",
59+
"alias": "Page Path",
60+
}
61+
# The url of the clicked link, only returned in EVENT_BUILTIN_CLICK. Does not include URL fragments
62+
DIMENSION_BUILTIN_URL = {
63+
"id": "linkUrl",
64+
"alias": "URL",
65+
}
66+
# The name of the event. See GA4 docs for event names
67+
DIMENSION_EVENT_NAME = {
68+
"id": "eventName",
69+
"alias": "Event Name",
70+
}
71+
# The url of the clicked link, only returned in EVENT_CUSTOM_CLICK. Includes URL fragments.
72+
DIMENSION_CUSTOM_URL = {
73+
"id": "customEvent:click_url",
74+
"alias": "Outbound URL",
75+
}
76+
# The landing page for a session
77+
DIMENSION_LANDING_PAGE = {
78+
"id": "landingPage",
79+
"alias": "Landing Page",
80+
}
81+
# The current month in the format YYYYMM
82+
DIMENSION_YEAR_MONTH = {
83+
"id": "yearMonth",
84+
"alias": "Month",
85+
}
86+
# The hostname of the clicked link. Based on DIMENSION_CUSTOM_URL and DIMENSION_BUILTIN_URL
87+
SYNTHETIC_DIMENSION_CLICKED_HOSTNAME = {
88+
"id": None,
89+
"alias": "Clicked Hostname",
90+
}
91+
# The complete clicked link, including hostname, parameters, fragments, and prefix. Based on DIMENSION_CUSTOM_URL and DIMENSION_BUILTIN_URL
92+
SYNTHETIC_DIMENSION_CLICKED_LINK = {
93+
"id": None,
94+
"alias": "Outbound Link",
95+
}
96+
97+
# Used as arguments in get_change_over_time_df
98+
class ADDITIONAL_DATA_BEHAVIOR(Enum):
99+
ADD = "add" # Sum the cached data with the api data
100+
REPLACE = "replace"# Replace the api data with the cached data

analytics/analytics_package/analytics/fields.py

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)