Skip to content

Commit 33c8a68

Browse files
Jonah Patenhunterckx
andauthored
chore: added example code for analytics package (#4393) (#4398)
* chore: added example code for analytics package (#4393) * chore: update required analytics package version (#4393) --------- Co-authored-by: hunterckx <118154470+hunterckx@users.noreply.github.com>
1 parent 98cb089 commit 33c8a68

File tree

5 files changed

+298
-1
lines changed

5 files changed

+298
-1
lines changed

analytics/example/constants.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# CHANGE THESE VALUES TO GENERATE NEW REPORTS
2+
# The start and end dates of the current month (yyyy-mm-dd)
3+
START_DATE_CURRENT = "2025-01-01"
4+
END_DATE_CURRENT = "2025-01-31"
5+
# The start and end dates of the prior months
6+
START_DATE_PRIOR = "2024-12-01"
7+
END_DATE_PRIOR = "2024-12-31"
8+
# The name of the folder in which to save the report
9+
PARENT_FOLDER_NAME = "Test Folder 2"
10+
11+
# THESE VALUES SHOULD ONLY NOT BE CHANGED EXCEPT TO ADD NEW PROPERTIES
12+
# The name of the spreadsheet with the report
13+
SHEET_NAME = "Test Report Table"
14+
# The path to a file containing historic GA data. Provided as an example, it is not used in this script
15+
HISTORIC_UA_DATA_PATH = "./users_over_time_history.json"
16+
# The catalog id for the selected property. In this case, the HCA browser/portal
17+
ANALYTICS_PROPERTY_ID = "361323030"
18+
# The name of the environment variable that contains the path to the client secret file
19+
SECRET_NAME = 'ANALYTICS_CLIENT_SECRET_PATH'
20+
# The start date of the month for which reliable GA4 data is available for each day. Varies betweren properties
21+
ANALYTICS_START = "2023-07-01"
22+
# The start of the first month in which custom events are available. Multiple values for this will need to be added if more custom events are added
23+
CUSTOM_EVENT_START = "2024-12-01"
24+
# The port to host the Oauth authentication on. Needs to match a configured redirect_uri on the google developer portal
25+
OAUTH_PORT = 8082
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from analytics import sheets_api as sheets\n",
10+
"from analytics import sheets_elements as elements\n",
11+
"from analytics import api as ga\n",
12+
"from analytics.entities import *\n",
13+
"from constants import *\n",
14+
"import pandas as pd\n",
15+
"import gspread\n",
16+
"import time"
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": null,
22+
"metadata": {},
23+
"outputs": [],
24+
"source": [
25+
"# The name of this environment variable will need to be changed based on where your credentials are stored.\n",
26+
"# The name must match the configured value in constants.py\n",
27+
"%env ANALYTICS_CLIENT_SECRET_PATH=../../../do_not_commit_ga4_credentials.json"
28+
]
29+
},
30+
{
31+
"cell_type": "markdown",
32+
"metadata": {},
33+
"source": [
34+
"#### Authenticate and define parameters"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": null,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"# Authentication\n",
44+
"ga_authentication, drive_authentication, sheets_authentication = ga.authenticate(\n",
45+
" SECRET_NAME,\n",
46+
" ga.ga4_service_params,\n",
47+
" ga.drive_service_params,\n",
48+
" ga.sheets_service_params,\n",
49+
" port=OAUTH_PORT\n",
50+
")\n",
51+
"\n",
52+
"date_string = f\"{START_DATE_CURRENT} - {END_DATE_CURRENT}\"\n",
53+
"\n",
54+
"default_params = {\n",
55+
" \"service_system\": ga_authentication,\n",
56+
" \"start_date\": START_DATE_CURRENT,\n",
57+
" \"end_date\": END_DATE_CURRENT,\n",
58+
"}\n",
59+
"\n",
60+
"hca_explorer_params = {\n",
61+
" **default_params,\n",
62+
" \"property\": ANALYTICS_PROPERTY_ID,\n",
63+
"}\n",
64+
"\n",
65+
"hca_explorer_params_from_analytics_start = {\n",
66+
" **hca_explorer_params,\n",
67+
" \"start_date\": ANALYTICS_START,\n",
68+
"}\n",
69+
"hca_explorer_params_with_custom_events = {\n",
70+
" **hca_explorer_params,\n",
71+
" \"start_date\": CUSTOM_EVENT_START,\n",
72+
"}\n",
73+
"\n",
74+
"date_args = [START_DATE_CURRENT, END_DATE_CURRENT, START_DATE_PRIOR, END_DATE_PRIOR]\n",
75+
"# Events associataed with bulk and project exports\n",
76+
"export_events = [\n",
77+
" EVENT_INDEX_FILE_MANIFEST_REQUESTED, EVENT_INDEX_FILE_MANIFEST_SELECTED,\n",
78+
" EVENT_INDEX_ANALYZE_IN_TERRA_REQUESTED, EVENT_INDEX_ANALYZE_IN_TERRA_SELECTED,\n",
79+
" EVENT_INDEX_BULK_DOWNLOAD_REQUESTED, EVENT_INDEX_BULK_DOWNLOAD_SELECTED,\n",
80+
"]\n",
81+
"# Misc custom events\n",
82+
"misc_custom_events = [EVENT_HCA_EXPLORE_DATA_CLICKED, EVENT_SUPPORT_REQUEST_CREATED]"
83+
]
84+
},
85+
{
86+
"cell_type": "markdown",
87+
"metadata": {},
88+
"source": [
89+
"#### Get data from the Analytics api as Pandas DataFrames"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": null,
95+
"metadata": {},
96+
"outputs": [],
97+
"source": [
98+
"# Get the monthly traffic summary sheet, containing total users and pageviews for each month\n",
99+
"# additional_data_path and additionl_detail_behavior are optional parameters that can be specified to add additional historical data\n",
100+
"# additional_data_path must point to a json file in the same format as the users_over_time_history.json file in this folder\n",
101+
"df_monthly_pageviews = elements.get_page_views_over_time_df(hca_explorer_params_from_analytics_start, additional_data_path=HISTORIC_UA_DATA_PATH, additional_data_behavior=elements.ADDITIONAL_DATA_BEHAVIOR.ADD)\n",
102+
"# Get the number of pageviews and active users for each page during the current month\n",
103+
"df_pageviews = elements.get_page_views_change(hca_explorer_params, *date_args)\n",
104+
"# Get the number of clicks to each outbound link\n",
105+
"df_outbound = elements.get_outbound_links_change(hca_explorer_params, *date_args)\n",
106+
"# Get the number of sessions that start on a particular landing page\n",
107+
"df_landing_pages = elements.get_landing_page_change(hca_explorer_params, *date_args)\n",
108+
"# Get the number of visits to each tab on the explorer and the total number of users visiting.\n",
109+
"# Includes users who visit the default tab\n",
110+
"df_entity_selected = elements.get_index_entity_selected_change(hca_explorer_params, *date_args)\n",
111+
"# Get the number of clicks and users for each pagination button\n",
112+
"df_pagination = elements.get_index_entity_table_paginated_change(hca_explorer_params, *date_args)\n",
113+
"# Get the number of clicks and users for each sort button\n",
114+
"df_sort = elements.get_index_entity_table_sorted_change(hca_explorer_params, *date_args)\n",
115+
"# Get the number of clicks and users for each filter name and value\n",
116+
"df_filter = elements.get_index_filter_selected_change(hca_explorer_params, *date_args)\n",
117+
"# Get the number of clicks and users for each file download button on the index page only\n",
118+
"df_file_downloaded = elements.get_index_table_download_change(hca_explorer_params, *date_args)\n",
119+
"# Get the number of times each export event was triggered\n",
120+
"df_export = elements.get_event_count_over_time_df(hca_explorer_params_with_custom_events, export_events)\n",
121+
"# Get the number of times each misc custom event was triggered\n",
122+
"df_misc_custom_events = elements.get_event_count_over_time_df(hca_explorer_params_with_custom_events, misc_custom_events)"
123+
]
124+
},
125+
{
126+
"cell_type": "markdown",
127+
"metadata": {},
128+
"source": [
129+
"#### Save analytics data to sheets"
130+
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": null,
135+
"metadata": {},
136+
"outputs": [],
137+
"source": [
138+
"# Currently this example makes too many api queries, so it will fail. \n",
139+
"# This can be resolved by updating the package to use batch updating wherever possible for gspread-formatting\n",
140+
"# and to add exception handling to other gspread calls so they retry after a certain amount of time.\n",
141+
"# Ideally, it would be possible to reduce the number of api calls by batching all updates to the spreadsheet in one\n",
142+
"# api call, but this does not appear to be possible with gspread\n",
143+
"# See https://gspread-formatting.readthedocs.io/en/latest/#batch-mode-for-api-call-efficiency\n",
144+
"# and https://developers.google.com/sheets/api/limits#exponential\n",
145+
"\n",
146+
"dict_spreadsheet = {\n",
147+
" \"Monthly Traffic Summary\": df_monthly_pageviews,\n",
148+
" \"Pageviews\": df_pageviews,\n",
149+
" \"Outbound Links\": df_outbound,\n",
150+
" \"Landing Pages\": df_landing_pages,\n",
151+
" \"Entity Selected\": df_entity_selected,\n",
152+
" \"Pagination\": df_pagination,\n",
153+
" \"Sort\": df_sort,\n",
154+
" \"Filter\": df_filter,\n",
155+
" \"File Downloaded\": df_file_downloaded,\n",
156+
" \"Exports\": df_export,\n",
157+
" \"Misc\" : df_misc_custom_events,\n",
158+
"}\n",
159+
"sheet = sheets.create_sheet_in_folder(\n",
160+
" drive_authentication,\n",
161+
" SHEET_NAME,\n",
162+
" PARENT_FOLDER_NAME,\n",
163+
" override_behavior=sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE\n",
164+
" )\n",
165+
"#TODO: need to update package to use batch for gspread_formatting wherever possible, otherwise this won't run\n",
166+
"eventname_totalusers = {\n",
167+
" METRIC_EVENT_COUNT[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
168+
" METRIC_TOTAL_USERS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
169+
"}\n",
170+
"\n",
171+
"sheets.fill_spreadsheet_with_df_dict(\n",
172+
" sheet,\n",
173+
" dict_spreadsheet,\n",
174+
" sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE,\n",
175+
" \n",
176+
" column_formatting_options={\n",
177+
" \"Monthly Traffic Summary\": {\n",
178+
" DIMENSION_YEAR_MONTH[\"alias\"]: sheets.COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE,\n",
179+
" METRIC_ACTIVE_USERS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
180+
" METRIC_PAGE_VIEWS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
181+
" },\n",
182+
" \"Outbound Links\": {\n",
183+
" SYNTHETIC_METRIC_CLICKS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
184+
" METRIC_TOTAL_USERS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
185+
" },\n",
186+
" \"Pageviews\": {\n",
187+
" METRIC_PAGE_VIEWS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
188+
" METRIC_TOTAL_USERS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
189+
" },\n",
190+
" \"Entity Selected\": eventname_totalusers,\n",
191+
" \"Pagination\": eventname_totalusers,\n",
192+
" \"Sort\": eventname_totalusers,\n",
193+
" \"Filter\": eventname_totalusers,\n",
194+
" \"File Downloaded\": eventname_totalusers,\n",
195+
" \"Exports\": {event[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED for event in export_events},\n",
196+
" \"Misc\": {event[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED for event in misc_custom_events},\n",
197+
" },\n",
198+
" sheet_formatting_options={\n",
199+
" \"Monthly Traffic Summary\": {\n",
200+
" \"extra_columns\": 1,\n",
201+
" \"extra_columns_width\": 2000\n",
202+
" }\n",
203+
" },\n",
204+
" gspread_update_args={\n",
205+
" \"Filter\": {\n",
206+
" \"value_input_option\": gspread.utils.ValueInputOption.raw\n",
207+
" }\n",
208+
" }\n",
209+
")\n",
210+
"monthly_traffic_worksheet = sheet.worksheet(\"Monthly Traffic Summary\")\n",
211+
"date_range = sheets.WorksheetRange(\n",
212+
" monthly_traffic_worksheet, \n",
213+
" gspread.cell.Cell(1, 1), \n",
214+
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 2)\n",
215+
")\n",
216+
"users_range = sheets.WorksheetRange(\n",
217+
" monthly_traffic_worksheet, \n",
218+
" gspread.cell.Cell(1, 2), \n",
219+
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 3)\n",
220+
")\n",
221+
"pageviews_range = sheets.WorksheetRange(\n",
222+
" monthly_traffic_worksheet, \n",
223+
" gspread.cell.Cell(1, 3), \n",
224+
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 4)\n",
225+
")\n",
226+
"time.sleep(45)\n",
227+
"sheets.add_chart_to_sheet(\n",
228+
" sheets_authentication,\n",
229+
" sheet,\n",
230+
" sheet.worksheet(\"Monthly Traffic Summary\"),\n",
231+
" sheets.CHART_TYPES.LINE,\n",
232+
" date_range,\n",
233+
" [users_range, pageviews_range],\n",
234+
" chart_position=gspread.cell.Cell(1, 6),\n",
235+
" chart_position_offset_x=75,\n",
236+
" chart_position_offset_y=75,\n",
237+
" title=\"Pageviews and Users Over Time\"\n",
238+
")"
239+
]
240+
}
241+
],
242+
"metadata": {
243+
"kernelspec": {
244+
"display_name": "venv",
245+
"language": "python",
246+
"name": "python3"
247+
},
248+
"language_info": {
249+
"codemirror_mode": {
250+
"name": "ipython",
251+
"version": 3
252+
},
253+
"file_extension": ".py",
254+
"mimetype": "text/x-python",
255+
"name": "python",
256+
"nbconvert_exporter": "python",
257+
"pygments_lexer": "ipython3",
258+
"version": "3.12.8"
259+
}
260+
},
261+
"nbformat": 4,
262+
"nbformat_minor": 4
263+
}

analytics/example/readme.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
## Example implementation
2+
This folder represents an example implementation of the analytics package, using version 4.2.0 and the HCA combined property. It utilizes all current event definitions, metrics, and dimensions, to provide comprehensive examples, though some events are ont defined for other properties.
3+
4+
### Generating Reports
5+
- Update `constants.py` to reflect the date ranges and file name you would like for the report
6+
- Open `./generate_sheets_report.ipynb` using your favorite IDE or by running `jupyter notebook` and selecting it from the browser window that appears
7+
- Run all cells in the Jupyter notebook by pressing the button with two arrows at the top. You will be prompted to log in to your Google Account, which must have access to the relevant analytics property
8+
- Check your Google Drive to ensure that the desired spreadsheet is present
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"Users":{"1685577600000":7778,"1682899200000":7384,"1680307200000":6467,"1677628800000":6574,"1675209600000":5855,"1672531200000":4470,"1669852800000":4684,"1667260800000":4965,"1664582400000":4891,"1661990400000":4766,"1659312000000":4304,"1656633600000":4112,"1654041600000":4182,"1651363200000":4960,"1648771200000":3899,"1646092800000":3412,"1643673600000":3104,"1640995200000":2836,"1638316800000":2394,"1635724800000":2938,"1633046400000":2584,"1630454400000":2405,"1627776000000":2496,"1625097600000":2954,"1622505600000":1891,"1619827200000":0},"Total Pageviews":{"1685577600000":44146,"1682899200000":48780,"1680307200000":39497,"1677628800000":38763,"1675209600000":30800,"1672531200000":25796,"1669852800000":26073,"1667260800000":28929,"1664582400000":29034,"1661990400000":28711,"1659312000000":27741,"1656633600000":29631,"1654041600000":30890,"1651363200000":33258,"1648771200000":29391,"1646092800000":27114,"1643673600000":22216,"1640995200000":22452,"1638316800000":19856,"1635724800000":24409,"1633046400000":22963,"1630454400000":20424,"1627776000000":21700,"1625097600000":25562,"1622505600000":16683,"1619827200000":0}}

analytics/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
accessible-pygments==0.0.5
22
alabaster==0.7.16
3-
-e git+https://github.com/DataBiosphere/data-browser.git@e2653f5605cc3220d28299bfc2cc48205c23067d#egg=analytics&subdirectory=analytics/analytics_package
3+
-e git+https://github.com/DataBiosphere/data-browser.git@98cb089c11c503943b349d25532ae182d5e413b5#egg=analytics&subdirectory=analytics/analytics_package
44
anyio==4.7.0
55
appdirs==1.4.4
66
appnope==0.1.4

0 commit comments

Comments
 (0)