Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions analytics/example/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# CHANGE THESE VALUES TO GENERATE NEW REPORTS
# The start and end dates of the current month (yyyy-mm-dd)
START_DATE_CURRENT = "2025-01-01"
END_DATE_CURRENT = "2025-01-31"
# The start and end dates of the prior months
START_DATE_PRIOR = "2024-12-01"
END_DATE_PRIOR = "2024-12-31"
# The name of the folder in which to save the report
PARENT_FOLDER_NAME = "Test Folder 2"

# THESE VALUES SHOULD ONLY NOT BE CHANGED EXCEPT TO ADD NEW PROPERTIES
# The name of the spreadsheet with the report
SHEET_NAME = "Test Report Table"
# The path to a file containing historic GA data. Provided as an example, it is not used in this script
HISTORIC_UA_DATA_PATH = "./users_over_time_history.json"
# The catalog id for the selected property. In this case, the HCA browser/portal
ANALYTICS_PROPERTY_ID = "361323030"
# The name of the environment variable that contains the path to the client secret file
SECRET_NAME = 'ANALYTICS_CLIENT_SECRET_PATH'
# The start date of the month for which reliable GA4 data is available for each day. Varies betweren properties
ANALYTICS_START = "2023-07-01"
# The start of the first month in which custom events are available. Multiple values for this will need to be added if more custom events are added
CUSTOM_EVENT_START = "2024-12-01"
# The port to host the Oauth authentication on. Needs to match a configured redirect_uri on the google developer portal
OAUTH_PORT = 8082
263 changes: 263 additions & 0 deletions analytics/example/generate_sheets_report.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from analytics import sheets_api as sheets\n",
"from analytics import sheets_elements as elements\n",
"from analytics import api as ga\n",
"from analytics.entities import *\n",
"from constants import *\n",
"import pandas as pd\n",
"import gspread\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The name of this environment variable will need to be changed based on where your credentials are stored.\n",
"# The name must match the configured value in constants.py\n",
"%env ANALYTICS_CLIENT_SECRET_PATH=../../../do_not_commit_ga4_credentials.json"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Authenticate and define parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Authentication\n",
"ga_authentication, drive_authentication, sheets_authentication = ga.authenticate(\n",
" SECRET_NAME,\n",
" ga.ga4_service_params,\n",
" ga.drive_service_params,\n",
" ga.sheets_service_params,\n",
" port=OAUTH_PORT\n",
")\n",
"\n",
"date_string = f\"{START_DATE_CURRENT} - {END_DATE_CURRENT}\"\n",
"\n",
"default_params = {\n",
" \"service_system\": ga_authentication,\n",
" \"start_date\": START_DATE_CURRENT,\n",
" \"end_date\": END_DATE_CURRENT,\n",
"}\n",
"\n",
"hca_explorer_params = {\n",
" **default_params,\n",
" \"property\": ANALYTICS_PROPERTY_ID,\n",
"}\n",
"\n",
"hca_explorer_params_from_analytics_start = {\n",
" **hca_explorer_params,\n",
" \"start_date\": ANALYTICS_START,\n",
"}\n",
"hca_explorer_params_with_custom_events = {\n",
" **hca_explorer_params,\n",
" \"start_date\": CUSTOM_EVENT_START,\n",
"}\n",
"\n",
"date_args = [START_DATE_CURRENT, END_DATE_CURRENT, START_DATE_PRIOR, END_DATE_PRIOR]\n",
"# Events associataed with bulk and project exports\n",
"export_events = [\n",
" EVENT_INDEX_FILE_MANIFEST_REQUESTED, EVENT_INDEX_FILE_MANIFEST_SELECTED,\n",
" EVENT_INDEX_ANALYZE_IN_TERRA_REQUESTED, EVENT_INDEX_ANALYZE_IN_TERRA_SELECTED,\n",
" EVENT_INDEX_BULK_DOWNLOAD_REQUESTED, EVENT_INDEX_BULK_DOWNLOAD_SELECTED,\n",
"]\n",
"# Misc custom events\n",
"misc_custom_events = [EVENT_HCA_EXPLORE_DATA_CLICKED, EVENT_SUPPORT_REQUEST_CREATED]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get data from the Analytics api as Pandas DataFrames"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get the monthly traffic summary sheet, containing total users and pageviews for each month\n",
"# additional_data_path and additionl_detail_behavior are optional parameters that can be specified to add additional historical data\n",
"# additional_data_path must point to a json file in the same format as the users_over_time_history.json file in this folder\n",
"df_monthly_pageviews = elements.get_page_views_over_time_df(hca_explorer_params_from_analytics_start, additional_data_path=HISTORIC_UA_DATA_PATH, additional_data_behavior=elements.ADDITIONAL_DATA_BEHAVIOR.ADD)\n",
"# Get the number of pageviews and active users for each page during the current month\n",
"df_pageviews = elements.get_page_views_change(hca_explorer_params, *date_args)\n",
"# Get the number of clicks to each outbound link\n",
"df_outbound = elements.get_outbound_links_change(hca_explorer_params, *date_args)\n",
"# Get the number of sessions that start on a particular landing page\n",
"df_landing_pages = elements.get_landing_page_change(hca_explorer_params, *date_args)\n",
"# Get the number of visits to each tab on the explorer and the total number of users visiting.\n",
"# Includes users who visit the default tab\n",
"df_entity_selected = elements.get_index_entity_selected_change(hca_explorer_params, *date_args)\n",
"# Get the number of clicks and users for each pagination button\n",
"df_pagination = elements.get_index_entity_table_paginated_change(hca_explorer_params, *date_args)\n",
"# Get the number of clicks and users for each sort button\n",
"df_sort = elements.get_index_entity_table_sorted_change(hca_explorer_params, *date_args)\n",
"# Get the number of clicks and users for each filter name and value\n",
"df_filter = elements.get_index_filter_selected_change(hca_explorer_params, *date_args)\n",
"# Get the number of clicks and users for each file download button on the index page only\n",
"df_file_downloaded = elements.get_index_table_download_change(hca_explorer_params, *date_args)\n",
"# Get the number of times each export event was triggered\n",
"df_export = elements.get_event_count_over_time_df(hca_explorer_params_with_custom_events, export_events)\n",
"# Get the number of times each misc custom event was triggered\n",
"df_misc_custom_events = elements.get_event_count_over_time_df(hca_explorer_params_with_custom_events, misc_custom_events)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Save analytics data to sheets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Currently this example makes too many api queries, so it will fail. \n",
"# This can be resolved by updating the package to use batch updating wherever possible for gspread-formatting\n",
"# and to add exception handling to other gspread calls so they retry after a certain amount of time.\n",
"# Ideally, it would be possible to reduce the number of api calls by batching all updates to the spreadsheet in one\n",
"# api call, but this does not appear to be possible with gspread\n",
"# See https://gspread-formatting.readthedocs.io/en/latest/#batch-mode-for-api-call-efficiency\n",
"# and https://developers.google.com/sheets/api/limits#exponential\n",
"\n",
"dict_spreadsheet = {\n",
" \"Monthly Traffic Summary\": df_monthly_pageviews,\n",
" \"Pageviews\": df_pageviews,\n",
" \"Outbound Links\": df_outbound,\n",
" \"Landing Pages\": df_landing_pages,\n",
" \"Entity Selected\": df_entity_selected,\n",
" \"Pagination\": df_pagination,\n",
" \"Sort\": df_sort,\n",
" \"Filter\": df_filter,\n",
" \"File Downloaded\": df_file_downloaded,\n",
" \"Exports\": df_export,\n",
" \"Misc\" : df_misc_custom_events,\n",
"}\n",
"sheet = sheets.create_sheet_in_folder(\n",
" drive_authentication,\n",
" SHEET_NAME,\n",
" PARENT_FOLDER_NAME,\n",
" override_behavior=sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE\n",
" )\n",
"#TODO: need to update package to use batch for gspread_formatting wherever possible, otherwise this won't run\n",
"eventname_totalusers = {\n",
" METRIC_EVENT_COUNT[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" METRIC_TOTAL_USERS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
"}\n",
"\n",
"sheets.fill_spreadsheet_with_df_dict(\n",
" sheet,\n",
" dict_spreadsheet,\n",
" sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE,\n",
" \n",
" column_formatting_options={\n",
" \"Monthly Traffic Summary\": {\n",
" DIMENSION_YEAR_MONTH[\"alias\"]: sheets.COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE,\n",
" METRIC_ACTIVE_USERS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" METRIC_PAGE_VIEWS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" },\n",
" \"Outbound Links\": {\n",
" SYNTHETIC_METRIC_CLICKS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" METRIC_TOTAL_USERS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" },\n",
" \"Pageviews\": {\n",
" METRIC_PAGE_VIEWS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" METRIC_TOTAL_USERS[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
" },\n",
" \"Entity Selected\": eventname_totalusers,\n",
" \"Pagination\": eventname_totalusers,\n",
" \"Sort\": eventname_totalusers,\n",
" \"Filter\": eventname_totalusers,\n",
" \"File Downloaded\": eventname_totalusers,\n",
" \"Exports\": {event[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED for event in export_events},\n",
" \"Misc\": {event[\"change_alias\"]: sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED for event in misc_custom_events},\n",
" },\n",
" sheet_formatting_options={\n",
" \"Monthly Traffic Summary\": {\n",
" \"extra_columns\": 1,\n",
" \"extra_columns_width\": 2000\n",
" }\n",
" },\n",
" gspread_update_args={\n",
" \"Filter\": {\n",
" \"value_input_option\": gspread.utils.ValueInputOption.raw\n",
" }\n",
" }\n",
")\n",
"monthly_traffic_worksheet = sheet.worksheet(\"Monthly Traffic Summary\")\n",
"date_range = sheets.WorksheetRange(\n",
" monthly_traffic_worksheet, \n",
" gspread.cell.Cell(1, 1), \n",
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 2)\n",
")\n",
"users_range = sheets.WorksheetRange(\n",
" monthly_traffic_worksheet, \n",
" gspread.cell.Cell(1, 2), \n",
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 3)\n",
")\n",
"pageviews_range = sheets.WorksheetRange(\n",
" monthly_traffic_worksheet, \n",
" gspread.cell.Cell(1, 3), \n",
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 4)\n",
")\n",
"time.sleep(45)\n",
"sheets.add_chart_to_sheet(\n",
" sheets_authentication,\n",
" sheet,\n",
" sheet.worksheet(\"Monthly Traffic Summary\"),\n",
" sheets.CHART_TYPES.LINE,\n",
" date_range,\n",
" [users_range, pageviews_range],\n",
" chart_position=gspread.cell.Cell(1, 6),\n",
" chart_position_offset_x=75,\n",
" chart_position_offset_y=75,\n",
" title=\"Pageviews and Users Over Time\"\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
8 changes: 8 additions & 0 deletions analytics/example/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## Example implementation
This folder represents an example implementation of the analytics package, using version 4.2.0 and the HCA combined property. It utilizes all current event definitions, metrics, and dimensions, to provide comprehensive examples, though some events are ont defined for other properties.

### Generating Reports
- Update `constants.py` to reflect the date ranges and file name you would like for the report
- Open `./generate_sheets_report.ipynb` using your favorite IDE or by running `jupyter notebook` and selecting it from the browser window that appears
- Run all cells in the Jupyter notebook by pressing the button with two arrows at the top. You will be prompted to log in to your Google Account, which must have access to the relevant analytics property
- Check your Google Drive to ensure that the desired spreadsheet is present
1 change: 1 addition & 0 deletions analytics/example/users_over_time_history.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"Users":{"1685577600000":7778,"1682899200000":7384,"1680307200000":6467,"1677628800000":6574,"1675209600000":5855,"1672531200000":4470,"1669852800000":4684,"1667260800000":4965,"1664582400000":4891,"1661990400000":4766,"1659312000000":4304,"1656633600000":4112,"1654041600000":4182,"1651363200000":4960,"1648771200000":3899,"1646092800000":3412,"1643673600000":3104,"1640995200000":2836,"1638316800000":2394,"1635724800000":2938,"1633046400000":2584,"1630454400000":2405,"1627776000000":2496,"1625097600000":2954,"1622505600000":1891,"1619827200000":0},"Total Pageviews":{"1685577600000":44146,"1682899200000":48780,"1680307200000":39497,"1677628800000":38763,"1675209600000":30800,"1672531200000":25796,"1669852800000":26073,"1667260800000":28929,"1664582400000":29034,"1661990400000":28711,"1659312000000":27741,"1656633600000":29631,"1654041600000":30890,"1651363200000":33258,"1648771200000":29391,"1646092800000":27114,"1643673600000":22216,"1640995200000":22452,"1638316800000":19856,"1635724800000":24409,"1633046400000":22963,"1630454400000":20424,"1627776000000":21700,"1625097600000":25562,"1622505600000":16683,"1619827200000":0}}
2 changes: 1 addition & 1 deletion analytics/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
accessible-pygments==0.0.5
alabaster==0.7.16
-e git+https://github.com/DataBiosphere/data-browser.git@e2653f5605cc3220d28299bfc2cc48205c23067d#egg=analytics&subdirectory=analytics/analytics_package
-e git+https://github.com/DataBiosphere/data-browser.git@98cb089c11c503943b349d25532ae182d5e413b5#egg=analytics&subdirectory=analytics/analytics_package
anyio==4.7.0
appdirs==1.4.4
appnope==0.1.4
Expand Down
Loading