Skip to content

Commit 6bfb92a

Browse files
author
Jonah Paten
authored
feat: bulk export analytics (#4386) (#4395)
* feat: bulk export analytics (#4386) * chore: bumped setup.py, resolved errors from rebase (#4386)
1 parent be8484e commit 6bfb92a

File tree

5 files changed

+127
-19
lines changed

5 files changed

+127
-19
lines changed

analytics/analytics_package/analytics/_sheets_utils.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,30 @@ def get_change_over_time_df(
104104
if strftime_format is not None:
105105
df_combined.index = pd.to_datetime(df_combined.index).strftime(strftime_format)
106106

107-
return df_combined.reset_index(names=time_dimension["alias"])
107+
return df_combined.reset_index(names=time_dimension["alias"])
108+
109+
def get_change_over_time_df_multiple_events(metric, events, time_dimension, **change_over_time_args):
110+
"""
111+
Get a DataFrame with the change over time for the given metrics, renamed to match metric_titles
112+
:param metrics: the metrics to be displayed
113+
:param events: the events to be used
114+
:param time_dimension: the time dimension to be used
115+
:param change_over_time_args: any other parameters to be passed to the get_change_over_time_df function, including service params
116+
:returns: a datetime with the values of the metrics for each time dimension.
117+
Columns are the time dimension alias (as a datetime), metric aliases (as ints), and change metric aliases (as floats)
118+
"""
119+
assert "dimension_filter" not in change_over_time_args
120+
return pd.concat(
121+
[
122+
get_change_over_time_df(
123+
[metric],
124+
time_dimension,
125+
**change_over_time_args,
126+
dimension_filter=f"eventName=={event['id']}"
127+
).rename(
128+
columns={metric["alias"]: event["alias"], metric["change_alias"]: event["change_alias"]}
129+
).set_index(time_dimension["alias"])
130+
for event in events
131+
],
132+
axis=1
133+
)

analytics/analytics_package/analytics/entities.py

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,77 @@
4545
# Event Names
4646
# The builtin outbound link click event. Stores the clicked URL in DIMENSION_BUILTIN_URL
4747
# Triggers under some circumstances where custom click does not, but does not include url fragments in any dimensions
48-
EVENT_BUILTIN_CLICK = "click"
48+
EVENT_BUILTIN_CLICK = {
49+
"id": "click",
50+
"alias": "Builtin Outbound Link Click"
51+
}
4952
# The custom outbound link click event. Stores the clicked URL DIMENSION_CUSTOM_URL
5053
# Includes url fragments, sometimes has a slightly different count to the built in click event
51-
EVENT_CUSTOM_CLICK = "outbound_link_clicked"
54+
EVENT_CUSTOM_CLICK = {
55+
"id": "outbound_link_clicked",
56+
"alias": "Custom Outbound Link Click"
57+
}
5258
# The builtin page view event.
53-
EVENT_PAGE_VIEW = "page_view"
54-
EVENT_FILE_DOWNLOADED = "file_downloaded"
55-
EVENT_ENTITY_SELECTED = "entity_selected"
56-
EVENT_ENTITY_TABLE_SORTED = "entity_table_sorted"
57-
EVENT_ENTITY_TABLE_PAGINATED = "entity_table_paginated"
58-
EVENT_FILTER_SELECTED = "filter_selected"
59+
EVENT_PAGE_VIEW = {
60+
"id": "page_view",
61+
"alias": "Page View"
62+
}
63+
64+
EVENT_INDEX_BULK_DOWNLOAD_SELECTED = {
65+
"id": "index_bulk_download_selected",
66+
"alias": "Bulk Download Selected",
67+
"change_alias": "Bulk Download Selected Change",
68+
}
69+
EVENT_INDEX_BULK_DOWNLOAD_REQUESTED = {
70+
"id": "bulk_download_requested",
71+
"alias": "Bulk Download Requested",
72+
"change_alias": "Bulk Download Requested Change",
73+
}
74+
EVENT_INDEX_FILE_MANIFEST_SELECTED = {
75+
"id": "index_file_manifest_selected",
76+
"alias": "File Manifest Selected",
77+
"change_alias": "File Manifest Selected Change",
78+
}
79+
EVENT_INDEX_FILE_MANIFEST_REQUESTED = {
80+
"id": "index_file_manifest_requested",
81+
"alias": "File Manifest Requested",
82+
"change_alias": "File Manifest Requested Change",
83+
}
84+
EVENT_INDEX_ANALYZE_IN_TERRA_SELECTED = {
85+
"id": "index_analyze_in_terra_selected",
86+
"alias": "Analyze in Terra Selected",
87+
"change_alias": "Analyze in Terra Selected Change",
88+
}
89+
EVENT_INDEX_ANALYZE_IN_TERRA_REQUESTED = {
90+
"id": "index_analyze_in_terra_requested",
91+
"alias": "Analyze in Terra Requested",
92+
"change_alias": "Analyze in Terra Requested Change",
93+
}
94+
EVENT_FILE_DOWNLOADED = {
95+
"id": "file_downloaded",
96+
"alias": "File Downloaded",
97+
"change_alias": "File Downloaded Change",
98+
}
99+
EVENT_ENTITY_SELECTED = {
100+
"id": "entity_selected",
101+
"alias": "Entity Selected",
102+
"change_alias": "Entity Selected Change",
103+
}
104+
EVENT_ENTITY_TABLE_SORTED = {
105+
"id": "entity_table_sorted",
106+
"alias": "Entity Table Sorted",
107+
"change_alias": "Entity Table Sorted Change",
108+
}
109+
EVENT_ENTITY_TABLE_PAGINATED = {
110+
"id": "entity_table_paginated",
111+
"alias": "Entity Table Paginated",
112+
"change_alias": "Entity Table Paginated Change",
113+
}
114+
EVENT_FILTER_SELECTED = {
115+
"id": "filter_selected",
116+
"alias": "Filter Selected",
117+
"change_alias": "Filter Selected Change",
118+
}
59119
# DIMENSIONS
60120
# The path to the page the user is on when the event occurs. Does not include fragments or parameters
61121
DIMENSION_PAGE_PATH = {

analytics/analytics_package/analytics/sheets_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def fill_worksheet_with_df(
226226
# Format worksheet
227227
# Justify Column Widths
228228
if "column_widths" not in sheet_formatting_options_filled or sheet_formatting_options_filled["column_widths"]["justify"]:
229-
text_widths = df.astype(str).columns.map(
229+
text_widths = df.columns.map(
230230
lambda column_name: df[column_name].astype(str).str.len().max()
231231
)
232232
header_widths = df.columns.str.len()

analytics/analytics_package/analytics/sheets_elements.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def get_outbound_links_df(analytics_params, ignore_index=True):
2121
df_builtin_links =get_data_df_from_fields(
2222
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
2323
[DIMENSION_PAGE_PATH, DIMENSION_BUILTIN_URL, DIMENSION_EVENT_NAME],
24-
dimension_filter=f"eventName=={EVENT_BUILTIN_CLICK}",
24+
dimension_filter=f"eventName=={EVENT_BUILTIN_CLICK['id']}",
2525
**analytics_params,
2626
).groupby(
2727
[DIMENSION_PAGE_PATH["alias"], DIMENSION_BUILTIN_URL["alias"]]
@@ -30,7 +30,7 @@ def get_outbound_links_df(analytics_params, ignore_index=True):
3030
df_custom_links = get_data_df_from_fields(
3131
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
3232
[DIMENSION_EVENT_NAME, DIMENSION_CUSTOM_URL, DIMENSION_PAGE_PATH],
33-
dimension_filter=f"eventName=={EVENT_CUSTOM_CLICK}",
33+
dimension_filter=f"eventName=={EVENT_CUSTOM_CLICK['id']}",
3434
**analytics_params,
3535
).groupby(
3636
[DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]]
@@ -123,7 +123,7 @@ def get_page_views_df(analytics_params, ignore_index=False):
123123
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS, METRIC_PAGE_VIEWS],
124124
[DIMENSION_PAGE_PATH, DIMENSION_EVENT_NAME],
125125
**analytics_params,
126-
dimension_filter=f"eventName=={EVENT_PAGE_VIEW}",
126+
dimension_filter=f"eventName=={EVENT_PAGE_VIEW['id']}",
127127
)[[DIMENSION_PAGE_PATH["alias"], METRIC_PAGE_VIEWS["alias"], METRIC_TOTAL_USERS["alias"]]].copy()
128128
if not ignore_index:
129129
df_response = df_response.set_index(DIMENSION_PAGE_PATH["alias"])
@@ -296,7 +296,7 @@ def get_index_table_download_df(analytics_params, ignore_index=True):
296296
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
297297
[DIMENSION_ENTITY_NAME, DIMENSION_RELATED_ENTITY_ID, DIMENSION_RELATED_ENTITY_NAME],
298298
**analytics_params,
299-
dimension_filter=f"eventName=={EVENT_FILE_DOWNLOADED}",
299+
dimension_filter=f"eventName=={EVENT_FILE_DOWNLOADED['id']}",
300300
)
301301
if not ignore_index:
302302
df_response = df_response.set_index([DIMENSION_ENTITY_NAME["alias"], DIMENSION_RELATED_ENTITY_ID["alias"], DIMENSION_RELATED_ENTITY_NAME["alias"]])
@@ -343,7 +343,7 @@ def get_index_entity_selected_df(analytics_params, ignore_index=True):
343343
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
344344
[DIMENSION_ENTITY_NAME_TAB],
345345
**analytics_params,
346-
dimension_filter=f"eventName=={EVENT_ENTITY_SELECTED}",
346+
dimension_filter=f"eventName=={EVENT_ENTITY_SELECTED['id']}",
347347
)
348348
if not ignore_index:
349349
df_response = df_response.set_index([DIMENSION_ENTITY_NAME_TAB["alias"]])
@@ -390,7 +390,7 @@ def get_index_entity_table_sorted_df(analytics_params, ignore_index=True):
390390
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
391391
[DIMENSION_ENTITY_NAME_TAB, DIMENSION_COLUMN_NAME, DIMENSION_SORT_DIRECTION],
392392
**analytics_params,
393-
dimension_filter=f"eventName=={EVENT_ENTITY_TABLE_SORTED}",
393+
dimension_filter=f"eventName=={EVENT_ENTITY_TABLE_SORTED['id']}",
394394
)
395395
if not ignore_index:
396396
df_response = df_response.set_index([DIMENSION_ENTITY_NAME_TAB["alias"], DIMENSION_COLUMN_NAME["alias"], DIMENSION_SORT_DIRECTION["alias"]])
@@ -438,7 +438,7 @@ def get_index_entity_table_paginated_df(analytics_params, ignore_index=True):
438438
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
439439
[DIMENSION_ENTITY_NAME_TAB, DIMENSION_PAGINATION_DIRECTION],
440440
**analytics_params,
441-
dimension_filter=f"eventName=={EVENT_ENTITY_TABLE_PAGINATED}",
441+
dimension_filter=f"eventName=={EVENT_ENTITY_TABLE_PAGINATED['id']}",
442442
)
443443
if not ignore_index:
444444
df_response = df_response.set_index([DIMENSION_ENTITY_NAME_TAB["alias"], DIMENSION_PAGINATION_DIRECTION["alias"]])
@@ -487,7 +487,7 @@ def get_index_filter_selected_df(analytics_params, ignore_index=True):
487487
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
488488
[DIMENSION_FILTER_NAME, DIMENSION_FILTER_VALUE],
489489
**analytics_params,
490-
dimension_filter=f"eventName=={EVENT_FILTER_SELECTED}",
490+
dimension_filter=f"eventName=={EVENT_FILTER_SELECTED['id']}",
491491
)
492492
if not ignore_index:
493493
df_response = df_response.set_index([DIMENSION_FILTER_NAME["alias"], DIMENSION_FILTER_VALUE["alias"]])
@@ -519,3 +519,25 @@ def get_index_filter_selected_change(analytics_params, start_current, end_curren
519519
sort_results=[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS]
520520
)
521521

522+
523+
def get_event_count_over_time_df(analytics_params, events, additional_data_path=None, additional_data_behavior=None):
524+
"""
525+
Get a DataFrame with pageviews and total active users over time from the Analytics API.
526+
527+
:param analytics_params: the parameters for the Analytics API, including service params, start dates, and end dates
528+
:param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None
529+
:param additional_data_behavior: the behavior to use when adding the additional data, as an instance of ADDITIONAL_DATA_BEHAVIOR, defaults to None
530+
:return: a DataFrame with the pageviews and total active users over time from the Analytics API.
531+
Columns are the dimension aliases, metrics (as ints), and change metrics (as floats)
532+
Dimensions: DIMENSION_YEAR_MONTH (as a datetime)
533+
Metrics: METRIC_ACTIVE_USERS, METRIC_PAGE_VIEWS
534+
"""
535+
536+
return get_change_over_time_df_multiple_events(
537+
METRIC_EVENT_COUNT,
538+
events,
539+
DIMENSION_YEAR_MONTH,
540+
additional_data_path=additional_data_path,
541+
additional_data_behavior=additional_data_behavior,
542+
**analytics_params
543+
)

analytics/analytics_package/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name="analytics",
5-
version="4.1.0",
5+
version="4.2.0",
66
packages=["analytics"],
77
install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"],
88
)

0 commit comments

Comments
 (0)